Commit | Line | Data |
---|---|---|
b46a33e2 | 1 | /* |
058a9b43 | 2 | * SPDX-License-Identifier: MIT |
b46a33e2 | 3 | * |
058a9b43 | 4 | * Copyright © 2017-2018 Intel Corporation |
b46a33e2 TU |
5 | */ |
6 | ||
447ae316 | 7 | #include <linux/irq.h> |
3b4ed2e2 | 8 | #include <linux/pm_runtime.h> |
112ed2d3 CW |
9 | |
10 | #include "gt/intel_engine.h" | |
51fbd8de | 11 | #include "gt/intel_engine_pm.h" |
750e76b4 | 12 | #include "gt/intel_engine_user.h" |
51fbd8de | 13 | #include "gt/intel_gt_pm.h" |
c1132367 | 14 | #include "gt/intel_rc6.h" |
3e7abf81 | 15 | #include "gt/intel_rps.h" |
112ed2d3 | 16 | |
058a9b43 | 17 | #include "i915_drv.h" |
ecbb5fb7 JN |
18 | #include "i915_pmu.h" |
19 | #include "intel_pm.h" | |
b46a33e2 TU |
20 | |
21 | /* Frequency for the sampling timer for events which need it. */ | |
22 | #define FREQUENCY 200 | |
23 | #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY) | |
24 | ||
25 | #define ENGINE_SAMPLE_MASK \ | |
26 | (BIT(I915_SAMPLE_BUSY) | \ | |
27 | BIT(I915_SAMPLE_WAIT) | \ | |
28 | BIT(I915_SAMPLE_SEMA)) | |
29 | ||
30 | #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS) | |
31 | ||
141a0895 | 32 | static cpumask_t i915_pmu_cpumask; |
b46a33e2 TU |
33 | |
34 | static u8 engine_config_sample(u64 config) | |
35 | { | |
36 | return config & I915_PMU_SAMPLE_MASK; | |
37 | } | |
38 | ||
39 | static u8 engine_event_sample(struct perf_event *event) | |
40 | { | |
41 | return engine_config_sample(event->attr.config); | |
42 | } | |
43 | ||
44 | static u8 engine_event_class(struct perf_event *event) | |
45 | { | |
46 | return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff; | |
47 | } | |
48 | ||
49 | static u8 engine_event_instance(struct perf_event *event) | |
50 | { | |
51 | return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff; | |
52 | } | |
53 | ||
54 | static bool is_engine_config(u64 config) | |
55 | { | |
56 | return config < __I915_PMU_OTHER(0); | |
57 | } | |
58 | ||
59 | static unsigned int config_enabled_bit(u64 config) | |
60 | { | |
61 | if (is_engine_config(config)) | |
62 | return engine_config_sample(config); | |
63 | else | |
64 | return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0)); | |
65 | } | |
66 | ||
67 | static u64 config_enabled_mask(u64 config) | |
68 | { | |
69 | return BIT_ULL(config_enabled_bit(config)); | |
70 | } | |
71 | ||
72 | static bool is_engine_event(struct perf_event *event) | |
73 | { | |
74 | return is_engine_config(event->attr.config); | |
75 | } | |
76 | ||
77 | static unsigned int event_enabled_bit(struct perf_event *event) | |
78 | { | |
79 | return config_enabled_bit(event->attr.config); | |
80 | } | |
81 | ||
908091c8 | 82 | static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) |
feff0dc6 | 83 | { |
908091c8 | 84 | struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); |
feff0dc6 TU |
85 | u64 enable; |
86 | ||
87 | /* | |
88 | * Only some counters need the sampling timer. | |
89 | * | |
90 | * We start with a bitmask of all currently enabled events. | |
91 | */ | |
908091c8 | 92 | enable = pmu->enable; |
feff0dc6 TU |
93 | |
94 | /* | |
95 | * Mask out all the ones which do not need the timer, or in | |
96 | * other words keep all the ones that could need the timer. | |
97 | */ | |
98 | enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | | |
99 | config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) | | |
100 | ENGINE_SAMPLE_MASK; | |
101 | ||
102 | /* | |
103 | * When the GPU is idle per-engine counters do not need to be | |
104 | * running so clear those bits out. | |
105 | */ | |
106 | if (!gpu_active) | |
107 | enable &= ~ENGINE_SAMPLE_MASK; | |
b3add01e TU |
108 | /* |
109 | * Also there is software busyness tracking available we do not | |
110 | * need the timer for I915_SAMPLE_BUSY counter. | |
111 | */ | |
bf73fc0f | 112 | else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) |
b3add01e | 113 | enable &= ~BIT(I915_SAMPLE_BUSY); |
feff0dc6 TU |
114 | |
115 | /* | |
116 | * If some bits remain it means we need the sampling timer running. | |
117 | */ | |
118 | return enable; | |
119 | } | |
120 | ||
c1132367 | 121 | static u64 __get_rc6(struct intel_gt *gt) |
feff0dc6 | 122 | { |
16ffe73c CW |
123 | struct drm_i915_private *i915 = gt->i915; |
124 | u64 val; | |
908091c8 | 125 | |
c1132367 | 126 | val = intel_rc6_residency_ns(>->rc6, |
16ffe73c CW |
127 | IS_VALLEYVIEW(i915) ? |
128 | VLV_GT_RENDER_RC6 : | |
129 | GEN6_GT_GFX_RC6); | |
130 | ||
131 | if (HAS_RC6p(i915)) | |
c1132367 | 132 | val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6p); |
16ffe73c CW |
133 | |
134 | if (HAS_RC6pp(i915)) | |
c1132367 | 135 | val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6pp); |
16ffe73c CW |
136 | |
137 | return val; | |
138 | } | |
139 | ||
140 | #if IS_ENABLED(CONFIG_PM) | |
141 | ||
142 | static inline s64 ktime_since(const ktime_t kt) | |
143 | { | |
144 | return ktime_to_ns(ktime_sub(ktime_get(), kt)); | |
145 | } | |
146 | ||
16ffe73c CW |
147 | static u64 get_rc6(struct intel_gt *gt) |
148 | { | |
149 | struct drm_i915_private *i915 = gt->i915; | |
150 | struct i915_pmu *pmu = &i915->pmu; | |
151 | unsigned long flags; | |
df6a4205 | 152 | bool awake = false; |
16ffe73c CW |
153 | u64 val; |
154 | ||
16ffe73c CW |
155 | if (intel_gt_pm_get_if_awake(gt)) { |
156 | val = __get_rc6(gt); | |
07779a76 | 157 | intel_gt_pm_put_async(gt); |
df6a4205 | 158 | awake = true; |
16ffe73c CW |
159 | } |
160 | ||
161 | spin_lock_irqsave(&pmu->lock, flags); | |
162 | ||
df6a4205 TU |
163 | if (awake) { |
164 | pmu->sample[__I915_SAMPLE_RC6].cur = val; | |
165 | } else { | |
166 | /* | |
167 | * We think we are runtime suspended. | |
168 | * | |
169 | * Report the delta from when the device was suspended to now, | |
170 | * on top of the last known real value, as the approximated RC6 | |
171 | * counter value. | |
172 | */ | |
173 | val = ktime_since(pmu->sleep_last); | |
174 | val += pmu->sample[__I915_SAMPLE_RC6].cur; | |
175 | } | |
176 | ||
177 | if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur) | |
178 | val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur; | |
16ffe73c | 179 | else |
df6a4205 | 180 | pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val; |
16ffe73c CW |
181 | |
182 | spin_unlock_irqrestore(&pmu->lock, flags); | |
183 | ||
184 | return val; | |
185 | } | |
186 | ||
187 | static void park_rc6(struct drm_i915_private *i915) | |
188 | { | |
189 | struct i915_pmu *pmu = &i915->pmu; | |
190 | ||
191 | if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY)) | |
df6a4205 | 192 | pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); |
16ffe73c CW |
193 | |
194 | pmu->sleep_last = ktime_get(); | |
195 | } | |
196 | ||
16ffe73c CW |
197 | #else |
198 | ||
199 | static u64 get_rc6(struct intel_gt *gt) | |
200 | { | |
201 | return __get_rc6(gt); | |
feff0dc6 TU |
202 | } |
203 | ||
16ffe73c | 204 | static void park_rc6(struct drm_i915_private *i915) {} |
16ffe73c CW |
205 | |
206 | #endif | |
207 | ||
908091c8 | 208 | static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) |
feff0dc6 | 209 | { |
908091c8 TU |
210 | if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) { |
211 | pmu->timer_enabled = true; | |
212 | pmu->timer_last = ktime_get(); | |
213 | hrtimer_start_range_ns(&pmu->timer, | |
feff0dc6 TU |
214 | ns_to_ktime(PERIOD), 0, |
215 | HRTIMER_MODE_REL_PINNED); | |
216 | } | |
217 | } | |
218 | ||
16ffe73c CW |
219 | void i915_pmu_gt_parked(struct drm_i915_private *i915) |
220 | { | |
221 | struct i915_pmu *pmu = &i915->pmu; | |
222 | ||
223 | if (!pmu->base.event_init) | |
224 | return; | |
225 | ||
226 | spin_lock_irq(&pmu->lock); | |
227 | ||
228 | park_rc6(i915); | |
229 | ||
230 | /* | |
231 | * Signal sampling timer to stop if only engine events are enabled and | |
232 | * GPU went idle. | |
233 | */ | |
234 | pmu->timer_enabled = pmu_needs_timer(pmu, false); | |
235 | ||
236 | spin_unlock_irq(&pmu->lock); | |
237 | } | |
238 | ||
feff0dc6 TU |
239 | void i915_pmu_gt_unparked(struct drm_i915_private *i915) |
240 | { | |
908091c8 TU |
241 | struct i915_pmu *pmu = &i915->pmu; |
242 | ||
243 | if (!pmu->base.event_init) | |
feff0dc6 TU |
244 | return; |
245 | ||
908091c8 | 246 | spin_lock_irq(&pmu->lock); |
16ffe73c | 247 | |
feff0dc6 TU |
248 | /* |
249 | * Re-enable sampling timer when GPU goes active. | |
250 | */ | |
908091c8 | 251 | __i915_pmu_maybe_start_timer(pmu); |
16ffe73c | 252 | |
908091c8 | 253 | spin_unlock_irq(&pmu->lock); |
feff0dc6 TU |
254 | } |
255 | ||
b46a33e2 | 256 | static void |
9f473ecf | 257 | add_sample(struct i915_pmu_sample *sample, u32 val) |
b46a33e2 | 258 | { |
9f473ecf | 259 | sample->cur += val; |
b46a33e2 TU |
260 | } |
261 | ||
d79e1bd6 CW |
262 | static bool exclusive_mmio_access(const struct drm_i915_private *i915) |
263 | { | |
264 | /* | |
265 | * We have to avoid concurrent mmio cache line access on gen7 or | |
266 | * risk a machine hang. For a fun history lesson dig out the old | |
267 | * userspace intel_gpu_top and run it on Ivybridge or Haswell! | |
268 | */ | |
269 | return IS_GEN(i915, 7); | |
270 | } | |
271 | ||
9f473ecf | 272 | static void |
08ce5c64 | 273 | engines_sample(struct intel_gt *gt, unsigned int period_ns) |
b46a33e2 | 274 | { |
08ce5c64 | 275 | struct drm_i915_private *i915 = gt->i915; |
b46a33e2 TU |
276 | struct intel_engine_cs *engine; |
277 | enum intel_engine_id id; | |
b46a33e2 | 278 | |
28fba096 | 279 | if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) |
b46a33e2 TU |
280 | return; |
281 | ||
edb1ecad CW |
282 | if (!intel_gt_pm_is_awake(gt)) |
283 | return; | |
284 | ||
c6e07ada | 285 | for_each_engine(engine, gt, id) { |
d0aa694b | 286 | struct intel_engine_pmu *pmu = &engine->pmu; |
d79e1bd6 | 287 | spinlock_t *mmio_lock; |
51fbd8de | 288 | unsigned long flags; |
d0aa694b | 289 | bool busy; |
b46a33e2 TU |
290 | u32 val; |
291 | ||
51fbd8de CW |
292 | if (!intel_engine_pm_get_if_awake(engine)) |
293 | continue; | |
294 | ||
d79e1bd6 CW |
295 | mmio_lock = NULL; |
296 | if (exclusive_mmio_access(i915)) | |
297 | mmio_lock = &engine->uncore->lock; | |
298 | ||
299 | if (unlikely(mmio_lock)) | |
300 | spin_lock_irqsave(mmio_lock, flags); | |
51fbd8de | 301 | |
28fba096 | 302 | val = ENGINE_READ_FW(engine, RING_CTL); |
d0aa694b | 303 | if (val == 0) /* powerwell off => engine idle */ |
51fbd8de | 304 | goto skip; |
b46a33e2 | 305 | |
9f473ecf | 306 | if (val & RING_WAIT) |
d0aa694b | 307 | add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); |
9f473ecf | 308 | if (val & RING_WAIT_SEMAPHORE) |
d0aa694b CW |
309 | add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); |
310 | ||
54fc577d TU |
311 | /* No need to sample when busy stats are supported. */ |
312 | if (intel_engine_supports_stats(engine)) | |
313 | goto skip; | |
314 | ||
d0aa694b CW |
315 | /* |
316 | * While waiting on a semaphore or event, MI_MODE reports the | |
317 | * ring as idle. However, previously using the seqno, and with | |
318 | * execlists sampling, we account for the ring waiting as the | |
319 | * engine being busy. Therefore, we record the sample as being | |
320 | * busy if either waiting or !idle. | |
321 | */ | |
322 | busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); | |
323 | if (!busy) { | |
28fba096 | 324 | val = ENGINE_READ_FW(engine, RING_MI_MODE); |
d0aa694b CW |
325 | busy = !(val & MODE_IDLE); |
326 | } | |
327 | if (busy) | |
328 | add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); | |
b46a33e2 | 329 | |
51fbd8de | 330 | skip: |
d79e1bd6 CW |
331 | if (unlikely(mmio_lock)) |
332 | spin_unlock_irqrestore(mmio_lock, flags); | |
07779a76 | 333 | intel_engine_pm_put_async(engine); |
51fbd8de | 334 | } |
b46a33e2 TU |
335 | } |
336 | ||
9f473ecf TU |
337 | static void |
338 | add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul) | |
339 | { | |
340 | sample->cur += mul_u32_u32(val, mul); | |
341 | } | |
342 | ||
b66ecd04 TU |
343 | static bool frequency_sampling_enabled(struct i915_pmu *pmu) |
344 | { | |
345 | return pmu->enable & | |
346 | (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | | |
347 | config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)); | |
348 | } | |
349 | ||
9f473ecf | 350 | static void |
08ce5c64 | 351 | frequency_sample(struct intel_gt *gt, unsigned int period_ns) |
b46a33e2 | 352 | { |
08ce5c64 TU |
353 | struct drm_i915_private *i915 = gt->i915; |
354 | struct intel_uncore *uncore = gt->uncore; | |
355 | struct i915_pmu *pmu = &i915->pmu; | |
3e7abf81 | 356 | struct intel_rps *rps = >->rps; |
08ce5c64 | 357 | |
b66ecd04 TU |
358 | if (!frequency_sampling_enabled(pmu)) |
359 | return; | |
360 | ||
361 | /* Report 0/0 (actual/requested) frequency while parked. */ | |
362 | if (!intel_gt_pm_get_if_awake(gt)) | |
363 | return; | |
364 | ||
08ce5c64 | 365 | if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { |
b46a33e2 TU |
366 | u32 val; |
367 | ||
b66ecd04 TU |
368 | /* |
369 | * We take a quick peek here without using forcewake | |
370 | * so that we don't perturb the system under observation | |
371 | * (forcewake => !rc6 => increased power use). We expect | |
372 | * that if the read fails because it is outside of the | |
373 | * mmio power well, then it will return 0 -- in which | |
374 | * case we assume the system is running at the intended | |
375 | * frequency. Fortunately, the read should rarely fail! | |
376 | */ | |
377 | val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1); | |
378 | if (val) | |
e03512ed | 379 | val = intel_rps_get_cagf(rps, val); |
b66ecd04 TU |
380 | else |
381 | val = rps->cur_freq; | |
b46a33e2 | 382 | |
08ce5c64 | 383 | add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], |
b66ecd04 | 384 | intel_gpu_freq(rps, val), period_ns / 1000); |
b46a33e2 TU |
385 | } |
386 | ||
08ce5c64 TU |
387 | if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { |
388 | add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ], | |
3e7abf81 | 389 | intel_gpu_freq(rps, rps->cur_freq), |
9f473ecf | 390 | period_ns / 1000); |
b46a33e2 | 391 | } |
b66ecd04 TU |
392 | |
393 | intel_gt_pm_put_async(gt); | |
b46a33e2 TU |
394 | } |
395 | ||
396 | static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) | |
397 | { | |
398 | struct drm_i915_private *i915 = | |
399 | container_of(hrtimer, struct drm_i915_private, pmu.timer); | |
908091c8 | 400 | struct i915_pmu *pmu = &i915->pmu; |
08ce5c64 | 401 | struct intel_gt *gt = &i915->gt; |
9f473ecf TU |
402 | unsigned int period_ns; |
403 | ktime_t now; | |
b46a33e2 | 404 | |
908091c8 | 405 | if (!READ_ONCE(pmu->timer_enabled)) |
b46a33e2 TU |
406 | return HRTIMER_NORESTART; |
407 | ||
9f473ecf | 408 | now = ktime_get(); |
908091c8 TU |
409 | period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last)); |
410 | pmu->timer_last = now; | |
9f473ecf TU |
411 | |
412 | /* | |
413 | * Strictly speaking the passed in period may not be 100% accurate for | |
414 | * all internal calculation, since some amount of time can be spent on | |
415 | * grabbing the forcewake. However the potential error from timer call- | |
416 | * back delay greatly dominates this so we keep it simple. | |
417 | */ | |
08ce5c64 TU |
418 | engines_sample(gt, period_ns); |
419 | frequency_sample(gt, period_ns); | |
9f473ecf TU |
420 | |
421 | hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD)); | |
b46a33e2 | 422 | |
b46a33e2 TU |
423 | return HRTIMER_RESTART; |
424 | } | |
425 | ||
0cd4684d TU |
426 | static u64 count_interrupts(struct drm_i915_private *i915) |
427 | { | |
428 | /* open-coded kstat_irqs() */ | |
429 | struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq); | |
430 | u64 sum = 0; | |
431 | int cpu; | |
432 | ||
433 | if (!desc || !desc->kstat_irqs) | |
434 | return 0; | |
435 | ||
436 | for_each_possible_cpu(cpu) | |
437 | sum += *per_cpu_ptr(desc->kstat_irqs, cpu); | |
438 | ||
439 | return sum; | |
440 | } | |
441 | ||
b2f78cda TU |
442 | static void engine_event_destroy(struct perf_event *event) |
443 | { | |
444 | struct drm_i915_private *i915 = | |
445 | container_of(event->pmu, typeof(*i915), pmu.base); | |
446 | struct intel_engine_cs *engine; | |
447 | ||
448 | engine = intel_engine_lookup_user(i915, | |
449 | engine_event_class(event), | |
450 | engine_event_instance(event)); | |
451 | if (WARN_ON_ONCE(!engine)) | |
452 | return; | |
453 | ||
454 | if (engine_event_sample(event) == I915_SAMPLE_BUSY && | |
455 | intel_engine_supports_stats(engine)) | |
456 | intel_disable_engine_stats(engine); | |
457 | } | |
458 | ||
b46a33e2 TU |
459 | static void i915_pmu_event_destroy(struct perf_event *event) |
460 | { | |
461 | WARN_ON(event->parent); | |
b2f78cda TU |
462 | |
463 | if (is_engine_event(event)) | |
464 | engine_event_destroy(event); | |
b46a33e2 TU |
465 | } |
466 | ||
109ec558 TU |
467 | static int |
468 | engine_event_status(struct intel_engine_cs *engine, | |
469 | enum drm_i915_pmu_engine_sample sample) | |
b46a33e2 | 470 | { |
109ec558 | 471 | switch (sample) { |
b46a33e2 TU |
472 | case I915_SAMPLE_BUSY: |
473 | case I915_SAMPLE_WAIT: | |
474 | break; | |
475 | case I915_SAMPLE_SEMA: | |
109ec558 TU |
476 | if (INTEL_GEN(engine->i915) < 6) |
477 | return -ENODEV; | |
478 | break; | |
479 | default: | |
480 | return -ENOENT; | |
481 | } | |
482 | ||
483 | return 0; | |
484 | } | |
485 | ||
486 | static int | |
487 | config_status(struct drm_i915_private *i915, u64 config) | |
488 | { | |
489 | switch (config) { | |
490 | case I915_PMU_ACTUAL_FREQUENCY: | |
491 | if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) | |
492 | /* Requires a mutex for sampling! */ | |
493 | return -ENODEV; | |
494 | /* Fall-through. */ | |
495 | case I915_PMU_REQUESTED_FREQUENCY: | |
b46a33e2 TU |
496 | if (INTEL_GEN(i915) < 6) |
497 | return -ENODEV; | |
498 | break; | |
109ec558 TU |
499 | case I915_PMU_INTERRUPTS: |
500 | break; | |
501 | case I915_PMU_RC6_RESIDENCY: | |
502 | if (!HAS_RC6(i915)) | |
503 | return -ENODEV; | |
504 | break; | |
b46a33e2 TU |
505 | default: |
506 | return -ENOENT; | |
507 | } | |
508 | ||
509 | return 0; | |
510 | } | |
511 | ||
109ec558 TU |
512 | static int engine_event_init(struct perf_event *event) |
513 | { | |
514 | struct drm_i915_private *i915 = | |
515 | container_of(event->pmu, typeof(*i915), pmu.base); | |
516 | struct intel_engine_cs *engine; | |
b2f78cda TU |
517 | u8 sample; |
518 | int ret; | |
109ec558 TU |
519 | |
520 | engine = intel_engine_lookup_user(i915, engine_event_class(event), | |
521 | engine_event_instance(event)); | |
522 | if (!engine) | |
523 | return -ENODEV; | |
524 | ||
b2f78cda TU |
525 | sample = engine_event_sample(event); |
526 | ret = engine_event_status(engine, sample); | |
527 | if (ret) | |
528 | return ret; | |
529 | ||
530 | if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine)) | |
531 | ret = intel_enable_engine_stats(engine); | |
532 | ||
533 | return ret; | |
109ec558 TU |
534 | } |
535 | ||
b46a33e2 TU |
536 | static int i915_pmu_event_init(struct perf_event *event) |
537 | { | |
538 | struct drm_i915_private *i915 = | |
539 | container_of(event->pmu, typeof(*i915), pmu.base); | |
0426c046 | 540 | int ret; |
b46a33e2 TU |
541 | |
542 | if (event->attr.type != event->pmu->type) | |
543 | return -ENOENT; | |
544 | ||
545 | /* unsupported modes and filters */ | |
546 | if (event->attr.sample_period) /* no sampling */ | |
547 | return -EINVAL; | |
548 | ||
549 | if (has_branch_stack(event)) | |
550 | return -EOPNOTSUPP; | |
551 | ||
552 | if (event->cpu < 0) | |
553 | return -EINVAL; | |
554 | ||
0426c046 TU |
555 | /* only allow running on one cpu at a time */ |
556 | if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask)) | |
00a79722 | 557 | return -EINVAL; |
b46a33e2 | 558 | |
109ec558 | 559 | if (is_engine_event(event)) |
b46a33e2 | 560 | ret = engine_event_init(event); |
109ec558 TU |
561 | else |
562 | ret = config_status(i915, event->attr.config); | |
b46a33e2 TU |
563 | if (ret) |
564 | return ret; | |
565 | ||
b46a33e2 TU |
566 | if (!event->parent) |
567 | event->destroy = i915_pmu_event_destroy; | |
568 | ||
569 | return 0; | |
570 | } | |
571 | ||
ad055fb8 | 572 | static u64 __i915_pmu_event_read(struct perf_event *event) |
b46a33e2 TU |
573 | { |
574 | struct drm_i915_private *i915 = | |
575 | container_of(event->pmu, typeof(*i915), pmu.base); | |
908091c8 | 576 | struct i915_pmu *pmu = &i915->pmu; |
b46a33e2 TU |
577 | u64 val = 0; |
578 | ||
579 | if (is_engine_event(event)) { | |
580 | u8 sample = engine_event_sample(event); | |
581 | struct intel_engine_cs *engine; | |
582 | ||
583 | engine = intel_engine_lookup_user(i915, | |
584 | engine_event_class(event), | |
585 | engine_event_instance(event)); | |
586 | ||
587 | if (WARN_ON_ONCE(!engine)) { | |
588 | /* Do nothing */ | |
b3add01e | 589 | } else if (sample == I915_SAMPLE_BUSY && |
b2f78cda | 590 | intel_engine_supports_stats(engine)) { |
b3add01e | 591 | val = ktime_to_ns(intel_engine_get_busy_time(engine)); |
b46a33e2 TU |
592 | } else { |
593 | val = engine->pmu.sample[sample].cur; | |
594 | } | |
595 | } else { | |
596 | switch (event->attr.config) { | |
597 | case I915_PMU_ACTUAL_FREQUENCY: | |
598 | val = | |
908091c8 | 599 | div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur, |
9f473ecf | 600 | USEC_PER_SEC /* to MHz */); |
b46a33e2 TU |
601 | break; |
602 | case I915_PMU_REQUESTED_FREQUENCY: | |
603 | val = | |
908091c8 | 604 | div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur, |
9f473ecf | 605 | USEC_PER_SEC /* to MHz */); |
b46a33e2 | 606 | break; |
0cd4684d TU |
607 | case I915_PMU_INTERRUPTS: |
608 | val = count_interrupts(i915); | |
609 | break; | |
6060b6ae | 610 | case I915_PMU_RC6_RESIDENCY: |
518ea582 | 611 | val = get_rc6(&i915->gt); |
6060b6ae | 612 | break; |
b46a33e2 TU |
613 | } |
614 | } | |
615 | ||
616 | return val; | |
617 | } | |
618 | ||
619 | static void i915_pmu_event_read(struct perf_event *event) | |
620 | { | |
621 | struct hw_perf_event *hwc = &event->hw; | |
622 | u64 prev, new; | |
623 | ||
624 | again: | |
625 | prev = local64_read(&hwc->prev_count); | |
ad055fb8 | 626 | new = __i915_pmu_event_read(event); |
b46a33e2 TU |
627 | |
628 | if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) | |
629 | goto again; | |
630 | ||
631 | local64_add(new - prev, &event->count); | |
632 | } | |
633 | ||
634 | static void i915_pmu_enable(struct perf_event *event) | |
635 | { | |
636 | struct drm_i915_private *i915 = | |
637 | container_of(event->pmu, typeof(*i915), pmu.base); | |
638 | unsigned int bit = event_enabled_bit(event); | |
908091c8 | 639 | struct i915_pmu *pmu = &i915->pmu; |
b46a33e2 TU |
640 | unsigned long flags; |
641 | ||
908091c8 | 642 | spin_lock_irqsave(&pmu->lock, flags); |
b46a33e2 | 643 | |
b46a33e2 TU |
644 | /* |
645 | * Update the bitmask of enabled events and increment | |
646 | * the event reference counter. | |
647 | */ | |
908091c8 TU |
648 | BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS); |
649 | GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); | |
650 | GEM_BUG_ON(pmu->enable_count[bit] == ~0); | |
651 | pmu->enable |= BIT_ULL(bit); | |
652 | pmu->enable_count[bit]++; | |
b46a33e2 | 653 | |
feff0dc6 TU |
654 | /* |
655 | * Start the sampling timer if needed and not already enabled. | |
656 | */ | |
908091c8 | 657 | __i915_pmu_maybe_start_timer(pmu); |
feff0dc6 | 658 | |
b46a33e2 TU |
659 | /* |
660 | * For per-engine events the bitmask and reference counting | |
661 | * is stored per engine. | |
662 | */ | |
663 | if (is_engine_event(event)) { | |
664 | u8 sample = engine_event_sample(event); | |
665 | struct intel_engine_cs *engine; | |
666 | ||
667 | engine = intel_engine_lookup_user(i915, | |
668 | engine_event_class(event), | |
669 | engine_event_instance(event)); | |
b46a33e2 | 670 | |
26a11dee TU |
671 | BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) != |
672 | I915_ENGINE_SAMPLE_COUNT); | |
673 | BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) != | |
674 | I915_ENGINE_SAMPLE_COUNT); | |
675 | GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); | |
676 | GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); | |
b46a33e2 | 677 | GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); |
26a11dee TU |
678 | |
679 | engine->pmu.enable |= BIT(sample); | |
b2f78cda | 680 | engine->pmu.enable_count[sample]++; |
b46a33e2 TU |
681 | } |
682 | ||
908091c8 | 683 | spin_unlock_irqrestore(&pmu->lock, flags); |
ad055fb8 | 684 | |
b46a33e2 TU |
685 | /* |
686 | * Store the current counter value so we can report the correct delta | |
687 | * for all listeners. Even when the event was already enabled and has | |
688 | * an existing non-zero value. | |
689 | */ | |
ad055fb8 | 690 | local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); |
b46a33e2 TU |
691 | } |
692 | ||
693 | static void i915_pmu_disable(struct perf_event *event) | |
694 | { | |
695 | struct drm_i915_private *i915 = | |
696 | container_of(event->pmu, typeof(*i915), pmu.base); | |
697 | unsigned int bit = event_enabled_bit(event); | |
908091c8 | 698 | struct i915_pmu *pmu = &i915->pmu; |
b46a33e2 TU |
699 | unsigned long flags; |
700 | ||
908091c8 | 701 | spin_lock_irqsave(&pmu->lock, flags); |
b46a33e2 TU |
702 | |
703 | if (is_engine_event(event)) { | |
704 | u8 sample = engine_event_sample(event); | |
705 | struct intel_engine_cs *engine; | |
706 | ||
707 | engine = intel_engine_lookup_user(i915, | |
708 | engine_event_class(event), | |
709 | engine_event_instance(event)); | |
26a11dee TU |
710 | |
711 | GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); | |
712 | GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); | |
b46a33e2 | 713 | GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); |
26a11dee | 714 | |
b46a33e2 TU |
715 | /* |
716 | * Decrement the reference count and clear the enabled | |
717 | * bitmask when the last listener on an event goes away. | |
718 | */ | |
b2f78cda | 719 | if (--engine->pmu.enable_count[sample] == 0) |
b46a33e2 TU |
720 | engine->pmu.enable &= ~BIT(sample); |
721 | } | |
722 | ||
908091c8 TU |
723 | GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); |
724 | GEM_BUG_ON(pmu->enable_count[bit] == 0); | |
b46a33e2 TU |
725 | /* |
726 | * Decrement the reference count and clear the enabled | |
727 | * bitmask when the last listener on an event goes away. | |
728 | */ | |
908091c8 TU |
729 | if (--pmu->enable_count[bit] == 0) { |
730 | pmu->enable &= ~BIT_ULL(bit); | |
731 | pmu->timer_enabled &= pmu_needs_timer(pmu, true); | |
feff0dc6 | 732 | } |
b46a33e2 | 733 | |
908091c8 | 734 | spin_unlock_irqrestore(&pmu->lock, flags); |
b46a33e2 TU |
735 | } |
736 | ||
737 | static void i915_pmu_event_start(struct perf_event *event, int flags) | |
738 | { | |
739 | i915_pmu_enable(event); | |
740 | event->hw.state = 0; | |
741 | } | |
742 | ||
743 | static void i915_pmu_event_stop(struct perf_event *event, int flags) | |
744 | { | |
745 | if (flags & PERF_EF_UPDATE) | |
746 | i915_pmu_event_read(event); | |
747 | i915_pmu_disable(event); | |
748 | event->hw.state = PERF_HES_STOPPED; | |
749 | } | |
750 | ||
751 | static int i915_pmu_event_add(struct perf_event *event, int flags) | |
752 | { | |
753 | if (flags & PERF_EF_START) | |
754 | i915_pmu_event_start(event, flags); | |
755 | ||
756 | return 0; | |
757 | } | |
758 | ||
759 | static void i915_pmu_event_del(struct perf_event *event, int flags) | |
760 | { | |
761 | i915_pmu_event_stop(event, PERF_EF_UPDATE); | |
762 | } | |
763 | ||
764 | static int i915_pmu_event_event_idx(struct perf_event *event) | |
765 | { | |
766 | return 0; | |
767 | } | |
768 | ||
b7d3aabf CW |
769 | struct i915_str_attribute { |
770 | struct device_attribute attr; | |
771 | const char *str; | |
772 | }; | |
773 | ||
b46a33e2 TU |
774 | static ssize_t i915_pmu_format_show(struct device *dev, |
775 | struct device_attribute *attr, char *buf) | |
776 | { | |
b7d3aabf | 777 | struct i915_str_attribute *eattr; |
b46a33e2 | 778 | |
b7d3aabf CW |
779 | eattr = container_of(attr, struct i915_str_attribute, attr); |
780 | return sprintf(buf, "%s\n", eattr->str); | |
b46a33e2 TU |
781 | } |
782 | ||
783 | #define I915_PMU_FORMAT_ATTR(_name, _config) \ | |
b7d3aabf | 784 | (&((struct i915_str_attribute[]) { \ |
b46a33e2 | 785 | { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \ |
b7d3aabf | 786 | .str = _config, } \ |
b46a33e2 TU |
787 | })[0].attr.attr) |
788 | ||
789 | static struct attribute *i915_pmu_format_attrs[] = { | |
790 | I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"), | |
791 | NULL, | |
792 | }; | |
793 | ||
794 | static const struct attribute_group i915_pmu_format_attr_group = { | |
795 | .name = "format", | |
796 | .attrs = i915_pmu_format_attrs, | |
797 | }; | |
798 | ||
b7d3aabf CW |
799 | struct i915_ext_attribute { |
800 | struct device_attribute attr; | |
801 | unsigned long val; | |
802 | }; | |
803 | ||
b46a33e2 TU |
804 | static ssize_t i915_pmu_event_show(struct device *dev, |
805 | struct device_attribute *attr, char *buf) | |
806 | { | |
b7d3aabf | 807 | struct i915_ext_attribute *eattr; |
b46a33e2 | 808 | |
b7d3aabf CW |
809 | eattr = container_of(attr, struct i915_ext_attribute, attr); |
810 | return sprintf(buf, "config=0x%lx\n", eattr->val); | |
b46a33e2 TU |
811 | } |
812 | ||
109ec558 | 813 | static struct attribute_group i915_pmu_events_attr_group = { |
b46a33e2 | 814 | .name = "events", |
109ec558 | 815 | /* Patch in attrs at runtime. */ |
b46a33e2 TU |
816 | }; |
817 | ||
818 | static ssize_t | |
819 | i915_pmu_get_attr_cpumask(struct device *dev, | |
820 | struct device_attribute *attr, | |
821 | char *buf) | |
822 | { | |
823 | return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask); | |
824 | } | |
825 | ||
826 | static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL); | |
827 | ||
828 | static struct attribute *i915_cpumask_attrs[] = { | |
829 | &dev_attr_cpumask.attr, | |
830 | NULL, | |
831 | }; | |
832 | ||
109ec558 | 833 | static const struct attribute_group i915_pmu_cpumask_attr_group = { |
b46a33e2 TU |
834 | .attrs = i915_cpumask_attrs, |
835 | }; | |
836 | ||
837 | static const struct attribute_group *i915_pmu_attr_groups[] = { | |
838 | &i915_pmu_format_attr_group, | |
839 | &i915_pmu_events_attr_group, | |
840 | &i915_pmu_cpumask_attr_group, | |
841 | NULL | |
842 | }; | |
843 | ||
109ec558 TU |
844 | #define __event(__config, __name, __unit) \ |
845 | { \ | |
846 | .config = (__config), \ | |
847 | .name = (__name), \ | |
848 | .unit = (__unit), \ | |
849 | } | |
850 | ||
851 | #define __engine_event(__sample, __name) \ | |
852 | { \ | |
853 | .sample = (__sample), \ | |
854 | .name = (__name), \ | |
855 | } | |
856 | ||
857 | static struct i915_ext_attribute * | |
858 | add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config) | |
859 | { | |
2bbba4e9 | 860 | sysfs_attr_init(&attr->attr.attr); |
109ec558 TU |
861 | attr->attr.attr.name = name; |
862 | attr->attr.attr.mode = 0444; | |
863 | attr->attr.show = i915_pmu_event_show; | |
864 | attr->val = config; | |
865 | ||
866 | return ++attr; | |
867 | } | |
868 | ||
869 | static struct perf_pmu_events_attr * | |
870 | add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, | |
871 | const char *str) | |
872 | { | |
2bbba4e9 | 873 | sysfs_attr_init(&attr->attr.attr); |
109ec558 TU |
874 | attr->attr.attr.name = name; |
875 | attr->attr.attr.mode = 0444; | |
876 | attr->attr.show = perf_event_sysfs_show; | |
877 | attr->event_str = str; | |
878 | ||
879 | return ++attr; | |
880 | } | |
881 | ||
882 | static struct attribute ** | |
908091c8 | 883 | create_event_attributes(struct i915_pmu *pmu) |
109ec558 | 884 | { |
908091c8 | 885 | struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); |
109ec558 TU |
886 | static const struct { |
887 | u64 config; | |
888 | const char *name; | |
889 | const char *unit; | |
890 | } events[] = { | |
e88866ef CW |
891 | __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"), |
892 | __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"), | |
109ec558 TU |
893 | __event(I915_PMU_INTERRUPTS, "interrupts", NULL), |
894 | __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), | |
895 | }; | |
896 | static const struct { | |
897 | enum drm_i915_pmu_engine_sample sample; | |
898 | char *name; | |
899 | } engine_events[] = { | |
900 | __engine_event(I915_SAMPLE_BUSY, "busy"), | |
901 | __engine_event(I915_SAMPLE_SEMA, "sema"), | |
902 | __engine_event(I915_SAMPLE_WAIT, "wait"), | |
903 | }; | |
904 | unsigned int count = 0; | |
905 | struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; | |
906 | struct i915_ext_attribute *i915_attr = NULL, *i915_iter; | |
907 | struct attribute **attr = NULL, **attr_iter; | |
908 | struct intel_engine_cs *engine; | |
109ec558 TU |
909 | unsigned int i; |
910 | ||
911 | /* Count how many counters we will be exposing. */ | |
912 | for (i = 0; i < ARRAY_SIZE(events); i++) { | |
913 | if (!config_status(i915, events[i].config)) | |
914 | count++; | |
915 | } | |
916 | ||
750e76b4 | 917 | for_each_uabi_engine(engine, i915) { |
109ec558 TU |
918 | for (i = 0; i < ARRAY_SIZE(engine_events); i++) { |
919 | if (!engine_event_status(engine, | |
920 | engine_events[i].sample)) | |
921 | count++; | |
922 | } | |
923 | } | |
924 | ||
925 | /* Allocate attribute objects and table. */ | |
dd5fec87 | 926 | i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL); |
109ec558 TU |
927 | if (!i915_attr) |
928 | goto err_alloc; | |
929 | ||
dd5fec87 | 930 | pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL); |
109ec558 TU |
931 | if (!pmu_attr) |
932 | goto err_alloc; | |
933 | ||
934 | /* Max one pointer of each attribute type plus a termination entry. */ | |
dd5fec87 | 935 | attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL); |
109ec558 TU |
936 | if (!attr) |
937 | goto err_alloc; | |
938 | ||
939 | i915_iter = i915_attr; | |
940 | pmu_iter = pmu_attr; | |
941 | attr_iter = attr; | |
942 | ||
943 | /* Initialize supported non-engine counters. */ | |
944 | for (i = 0; i < ARRAY_SIZE(events); i++) { | |
945 | char *str; | |
946 | ||
947 | if (config_status(i915, events[i].config)) | |
948 | continue; | |
949 | ||
950 | str = kstrdup(events[i].name, GFP_KERNEL); | |
951 | if (!str) | |
952 | goto err; | |
953 | ||
954 | *attr_iter++ = &i915_iter->attr.attr; | |
955 | i915_iter = add_i915_attr(i915_iter, str, events[i].config); | |
956 | ||
957 | if (events[i].unit) { | |
958 | str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name); | |
959 | if (!str) | |
960 | goto err; | |
961 | ||
962 | *attr_iter++ = &pmu_iter->attr.attr; | |
963 | pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit); | |
964 | } | |
965 | } | |
966 | ||
967 | /* Initialize supported engine counters. */ | |
750e76b4 | 968 | for_each_uabi_engine(engine, i915) { |
109ec558 TU |
969 | for (i = 0; i < ARRAY_SIZE(engine_events); i++) { |
970 | char *str; | |
971 | ||
972 | if (engine_event_status(engine, | |
973 | engine_events[i].sample)) | |
974 | continue; | |
975 | ||
976 | str = kasprintf(GFP_KERNEL, "%s-%s", | |
977 | engine->name, engine_events[i].name); | |
978 | if (!str) | |
979 | goto err; | |
980 | ||
981 | *attr_iter++ = &i915_iter->attr.attr; | |
982 | i915_iter = | |
983 | add_i915_attr(i915_iter, str, | |
8810bc56 | 984 | __I915_PMU_ENGINE(engine->uabi_class, |
750e76b4 | 985 | engine->uabi_instance, |
109ec558 TU |
986 | engine_events[i].sample)); |
987 | ||
988 | str = kasprintf(GFP_KERNEL, "%s-%s.unit", | |
989 | engine->name, engine_events[i].name); | |
990 | if (!str) | |
991 | goto err; | |
992 | ||
993 | *attr_iter++ = &pmu_iter->attr.attr; | |
994 | pmu_iter = add_pmu_attr(pmu_iter, str, "ns"); | |
995 | } | |
996 | } | |
997 | ||
908091c8 TU |
998 | pmu->i915_attr = i915_attr; |
999 | pmu->pmu_attr = pmu_attr; | |
109ec558 TU |
1000 | |
1001 | return attr; | |
1002 | ||
1003 | err:; | |
1004 | for (attr_iter = attr; *attr_iter; attr_iter++) | |
1005 | kfree((*attr_iter)->name); | |
1006 | ||
1007 | err_alloc: | |
1008 | kfree(attr); | |
1009 | kfree(i915_attr); | |
1010 | kfree(pmu_attr); | |
1011 | ||
1012 | return NULL; | |
1013 | } | |
1014 | ||
908091c8 | 1015 | static void free_event_attributes(struct i915_pmu *pmu) |
109ec558 TU |
1016 | { |
1017 | struct attribute **attr_iter = i915_pmu_events_attr_group.attrs; | |
1018 | ||
1019 | for (; *attr_iter; attr_iter++) | |
1020 | kfree((*attr_iter)->name); | |
1021 | ||
1022 | kfree(i915_pmu_events_attr_group.attrs); | |
908091c8 TU |
1023 | kfree(pmu->i915_attr); |
1024 | kfree(pmu->pmu_attr); | |
109ec558 TU |
1025 | |
1026 | i915_pmu_events_attr_group.attrs = NULL; | |
908091c8 TU |
1027 | pmu->i915_attr = NULL; |
1028 | pmu->pmu_attr = NULL; | |
109ec558 TU |
1029 | } |
1030 | ||
b46a33e2 TU |
1031 | static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) |
1032 | { | |
1033 | struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); | |
b46a33e2 TU |
1034 | |
1035 | GEM_BUG_ON(!pmu->base.event_init); | |
1036 | ||
b46a33e2 | 1037 | /* Select the first online CPU as a designated reader. */ |
0426c046 | 1038 | if (!cpumask_weight(&i915_pmu_cpumask)) |
b46a33e2 TU |
1039 | cpumask_set_cpu(cpu, &i915_pmu_cpumask); |
1040 | ||
1041 | return 0; | |
1042 | } | |
1043 | ||
1044 | static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) | |
1045 | { | |
1046 | struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); | |
1047 | unsigned int target; | |
1048 | ||
1049 | GEM_BUG_ON(!pmu->base.event_init); | |
1050 | ||
1051 | if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) { | |
1052 | target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); | |
1053 | /* Migrate events if there is a valid target */ | |
1054 | if (target < nr_cpu_ids) { | |
1055 | cpumask_set_cpu(target, &i915_pmu_cpumask); | |
1056 | perf_pmu_migrate_context(&pmu->base, cpu, target); | |
1057 | } | |
1058 | } | |
1059 | ||
1060 | return 0; | |
1061 | } | |
1062 | ||
1063 | static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; | |
b46a33e2 | 1064 | |
908091c8 | 1065 | static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) |
b46a33e2 | 1066 | { |
b46a33e2 TU |
1067 | enum cpuhp_state slot; |
1068 | int ret; | |
1069 | ||
1070 | ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, | |
1071 | "perf/x86/intel/i915:online", | |
1072 | i915_pmu_cpu_online, | |
1073 | i915_pmu_cpu_offline); | |
1074 | if (ret < 0) | |
1075 | return ret; | |
1076 | ||
1077 | slot = ret; | |
908091c8 | 1078 | ret = cpuhp_state_add_instance(slot, &pmu->node); |
b46a33e2 TU |
1079 | if (ret) { |
1080 | cpuhp_remove_multi_state(slot); | |
1081 | return ret; | |
1082 | } | |
1083 | ||
1084 | cpuhp_slot = slot; | |
b46a33e2 TU |
1085 | return 0; |
1086 | } | |
1087 | ||
908091c8 | 1088 | static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) |
b46a33e2 | 1089 | { |
b46a33e2 | 1090 | WARN_ON(cpuhp_slot == CPUHP_INVALID); |
908091c8 | 1091 | WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &pmu->node)); |
b46a33e2 | 1092 | cpuhp_remove_multi_state(cpuhp_slot); |
b46a33e2 TU |
1093 | } |
1094 | ||
05488673 TU |
1095 | static bool is_igp(struct drm_i915_private *i915) |
1096 | { | |
1097 | struct pci_dev *pdev = i915->drm.pdev; | |
1098 | ||
1099 | /* IGP is 0000:00:02.0 */ | |
1100 | return pci_domain_nr(pdev->bus) == 0 && | |
1101 | pdev->bus->number == 0 && | |
1102 | PCI_SLOT(pdev->devfn) == 2 && | |
1103 | PCI_FUNC(pdev->devfn) == 0; | |
1104 | } | |
1105 | ||
b46a33e2 TU |
1106 | void i915_pmu_register(struct drm_i915_private *i915) |
1107 | { | |
908091c8 | 1108 | struct i915_pmu *pmu = &i915->pmu; |
fb26eee0 | 1109 | int ret = -ENOMEM; |
b46a33e2 TU |
1110 | |
1111 | if (INTEL_GEN(i915) <= 2) { | |
88f8065c | 1112 | dev_info(i915->drm.dev, "PMU not supported for this GPU."); |
b46a33e2 TU |
1113 | return; |
1114 | } | |
1115 | ||
908091c8 TU |
1116 | spin_lock_init(&pmu->lock); |
1117 | hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
1118 | pmu->timer.function = i915_sample; | |
1119 | ||
aebf3b52 | 1120 | if (!is_igp(i915)) { |
05488673 | 1121 | pmu->name = kasprintf(GFP_KERNEL, |
aebf3b52 | 1122 | "i915_%s", |
05488673 | 1123 | dev_name(i915->drm.dev)); |
aebf3b52 TU |
1124 | if (pmu->name) { |
1125 | /* tools/perf reserves colons as special. */ | |
1126 | strreplace((char *)pmu->name, ':', '_'); | |
1127 | } | |
1128 | } else { | |
05488673 | 1129 | pmu->name = "i915"; |
aebf3b52 | 1130 | } |
05488673 | 1131 | if (!pmu->name) |
b46a33e2 TU |
1132 | goto err; |
1133 | ||
c442292a CW |
1134 | i915_pmu_events_attr_group.attrs = create_event_attributes(pmu); |
1135 | if (!i915_pmu_events_attr_group.attrs) | |
1136 | goto err_name; | |
1137 | ||
1138 | pmu->base.attr_groups = i915_pmu_attr_groups; | |
1139 | pmu->base.task_ctx_nr = perf_invalid_context; | |
1140 | pmu->base.event_init = i915_pmu_event_init; | |
1141 | pmu->base.add = i915_pmu_event_add; | |
1142 | pmu->base.del = i915_pmu_event_del; | |
1143 | pmu->base.start = i915_pmu_event_start; | |
1144 | pmu->base.stop = i915_pmu_event_stop; | |
1145 | pmu->base.read = i915_pmu_event_read; | |
1146 | pmu->base.event_idx = i915_pmu_event_event_idx; | |
1147 | ||
05488673 TU |
1148 | ret = perf_pmu_register(&pmu->base, pmu->name, -1); |
1149 | if (ret) | |
c442292a | 1150 | goto err_attr; |
05488673 | 1151 | |
908091c8 | 1152 | ret = i915_pmu_register_cpuhp_state(pmu); |
b46a33e2 TU |
1153 | if (ret) |
1154 | goto err_unreg; | |
1155 | ||
1156 | return; | |
1157 | ||
1158 | err_unreg: | |
908091c8 | 1159 | perf_pmu_unregister(&pmu->base); |
c442292a CW |
1160 | err_attr: |
1161 | pmu->base.event_init = NULL; | |
1162 | free_event_attributes(pmu); | |
05488673 TU |
1163 | err_name: |
1164 | if (!is_igp(i915)) | |
1165 | kfree(pmu->name); | |
b46a33e2 | 1166 | err: |
c442292a | 1167 | dev_notice(i915->drm.dev, "Failed to register PMU!\n"); |
b46a33e2 TU |
1168 | } |
1169 | ||
1170 | void i915_pmu_unregister(struct drm_i915_private *i915) | |
1171 | { | |
908091c8 TU |
1172 | struct i915_pmu *pmu = &i915->pmu; |
1173 | ||
1174 | if (!pmu->base.event_init) | |
b46a33e2 TU |
1175 | return; |
1176 | ||
908091c8 | 1177 | WARN_ON(pmu->enable); |
b46a33e2 | 1178 | |
908091c8 | 1179 | hrtimer_cancel(&pmu->timer); |
b46a33e2 | 1180 | |
908091c8 | 1181 | i915_pmu_unregister_cpuhp_state(pmu); |
b46a33e2 | 1182 | |
908091c8 TU |
1183 | perf_pmu_unregister(&pmu->base); |
1184 | pmu->base.event_init = NULL; | |
05488673 TU |
1185 | if (!is_igp(i915)) |
1186 | kfree(pmu->name); | |
908091c8 | 1187 | free_event_attributes(pmu); |
b46a33e2 | 1188 | } |