Commit | Line | Data |
---|---|---|
903b20ad JH |
1 | /* |
2 | * Meta performance counter support. | |
3 | * Copyright (C) 2012 Imagination Technologies Ltd | |
4 | * | |
5 | * This code is based on the sh pmu code: | |
6 | * Copyright (C) 2009 Paul Mundt | |
7 | * | |
8 | * and on the arm pmu code: | |
9 | * Copyright (C) 2009 picoChip Designs, Ltd., James Iles | |
10 | * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> | |
11 | * | |
12 | * This file is subject to the terms and conditions of the GNU General Public | |
13 | * License. See the file "COPYING" in the main directory of this archive | |
14 | * for more details. | |
15 | */ | |
16 | ||
17 | #include <linux/atomic.h> | |
18 | #include <linux/export.h> | |
19 | #include <linux/init.h> | |
20 | #include <linux/irqchip/metag.h> | |
21 | #include <linux/perf_event.h> | |
22 | #include <linux/slab.h> | |
23 | ||
24 | #include <asm/core_reg.h> | |
903b20ad JH |
25 | #include <asm/io.h> |
26 | #include <asm/irq.h> | |
9344de1b | 27 | #include <asm/processor.h> |
903b20ad JH |
28 | |
29 | #include "perf_event.h" | |
30 | ||
31 | static int _hw_perf_event_init(struct perf_event *); | |
32 | static void _hw_perf_event_destroy(struct perf_event *); | |
33 | ||
34 | /* Determines which core type we are */ | |
35 | static struct metag_pmu *metag_pmu __read_mostly; | |
36 | ||
37 | /* Processor specific data */ | |
38 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | |
39 | ||
40 | /* PMU admin */ | |
41 | const char *perf_pmu_name(void) | |
42 | { | |
f27086f5 JH |
43 | if (!metag_pmu) |
44 | return NULL; | |
903b20ad | 45 | |
f27086f5 | 46 | return metag_pmu->name; |
903b20ad JH |
47 | } |
48 | EXPORT_SYMBOL_GPL(perf_pmu_name); | |
49 | ||
50 | int perf_num_counters(void) | |
51 | { | |
52 | if (metag_pmu) | |
53 | return metag_pmu->max_events; | |
54 | ||
55 | return 0; | |
56 | } | |
57 | EXPORT_SYMBOL_GPL(perf_num_counters); | |
58 | ||
59 | static inline int metag_pmu_initialised(void) | |
60 | { | |
61 | return !!metag_pmu; | |
62 | } | |
63 | ||
64 | static void release_pmu_hardware(void) | |
65 | { | |
66 | int irq; | |
67 | unsigned int version = (metag_pmu->version & | |
68 | (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >> | |
69 | METAC_ID_REV_S; | |
70 | ||
71 | /* Early cores don't have overflow interrupts */ | |
72 | if (version < 0x0104) | |
73 | return; | |
74 | ||
75 | irq = internal_irq_map(17); | |
76 | if (irq >= 0) | |
77 | free_irq(irq, (void *)1); | |
78 | ||
79 | irq = internal_irq_map(16); | |
80 | if (irq >= 0) | |
81 | free_irq(irq, (void *)0); | |
82 | } | |
83 | ||
84 | static int reserve_pmu_hardware(void) | |
85 | { | |
86 | int err = 0, irq[2]; | |
87 | unsigned int version = (metag_pmu->version & | |
88 | (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >> | |
89 | METAC_ID_REV_S; | |
90 | ||
91 | /* Early cores don't have overflow interrupts */ | |
92 | if (version < 0x0104) | |
93 | goto out; | |
94 | ||
95 | /* | |
96 | * Bit 16 on HWSTATMETA is the interrupt for performance counter 0; | |
97 | * similarly, 17 is the interrupt for performance counter 1. | |
98 | * We can't (yet) interrupt on the cycle counter, because it's a | |
99 | * register, however it holds a 32-bit value as opposed to 24-bit. | |
100 | */ | |
101 | irq[0] = internal_irq_map(16); | |
102 | if (irq[0] < 0) { | |
103 | pr_err("unable to map internal IRQ %d\n", 16); | |
104 | goto out; | |
105 | } | |
106 | err = request_irq(irq[0], metag_pmu->handle_irq, IRQF_NOBALANCING, | |
107 | "metagpmu0", (void *)0); | |
108 | if (err) { | |
109 | pr_err("unable to request IRQ%d for metag PMU counters\n", | |
110 | irq[0]); | |
111 | goto out; | |
112 | } | |
113 | ||
114 | irq[1] = internal_irq_map(17); | |
115 | if (irq[1] < 0) { | |
116 | pr_err("unable to map internal IRQ %d\n", 17); | |
117 | goto out_irq1; | |
118 | } | |
119 | err = request_irq(irq[1], metag_pmu->handle_irq, IRQF_NOBALANCING, | |
120 | "metagpmu1", (void *)1); | |
121 | if (err) { | |
122 | pr_err("unable to request IRQ%d for metag PMU counters\n", | |
123 | irq[1]); | |
124 | goto out_irq1; | |
125 | } | |
126 | ||
127 | return 0; | |
128 | ||
129 | out_irq1: | |
130 | free_irq(irq[0], (void *)0); | |
131 | out: | |
132 | return err; | |
133 | } | |
134 | ||
135 | /* PMU operations */ | |
136 | static void metag_pmu_enable(struct pmu *pmu) | |
137 | { | |
138 | } | |
139 | ||
140 | static void metag_pmu_disable(struct pmu *pmu) | |
141 | { | |
142 | } | |
143 | ||
144 | static int metag_pmu_event_init(struct perf_event *event) | |
145 | { | |
146 | int err = 0; | |
147 | atomic_t *active_events = &metag_pmu->active_events; | |
148 | ||
149 | if (!metag_pmu_initialised()) { | |
150 | err = -ENODEV; | |
151 | goto out; | |
152 | } | |
153 | ||
154 | if (has_branch_stack(event)) | |
155 | return -EOPNOTSUPP; | |
156 | ||
157 | event->destroy = _hw_perf_event_destroy; | |
158 | ||
159 | if (!atomic_inc_not_zero(active_events)) { | |
160 | mutex_lock(&metag_pmu->reserve_mutex); | |
161 | if (atomic_read(active_events) == 0) | |
162 | err = reserve_pmu_hardware(); | |
163 | ||
164 | if (!err) | |
165 | atomic_inc(active_events); | |
166 | ||
167 | mutex_unlock(&metag_pmu->reserve_mutex); | |
168 | } | |
169 | ||
170 | /* Hardware and caches counters */ | |
171 | switch (event->attr.type) { | |
172 | case PERF_TYPE_HARDWARE: | |
173 | case PERF_TYPE_HW_CACHE: | |
f27086f5 | 174 | case PERF_TYPE_RAW: |
903b20ad JH |
175 | err = _hw_perf_event_init(event); |
176 | break; | |
177 | ||
178 | default: | |
179 | return -ENOENT; | |
180 | } | |
181 | ||
182 | if (err) | |
183 | event->destroy(event); | |
184 | ||
185 | out: | |
186 | return err; | |
187 | } | |
188 | ||
189 | void metag_pmu_event_update(struct perf_event *event, | |
190 | struct hw_perf_event *hwc, int idx) | |
191 | { | |
192 | u64 prev_raw_count, new_raw_count; | |
193 | s64 delta; | |
194 | ||
195 | /* | |
196 | * If this counter is chained, it may be that the previous counter | |
197 | * value has been changed beneath us. | |
198 | * | |
199 | * To get around this, we read and exchange the new raw count, then | |
200 | * add the delta (new - prev) to the generic counter atomically. | |
201 | * | |
202 | * Without interrupts, this is the simplest approach. | |
203 | */ | |
204 | again: | |
205 | prev_raw_count = local64_read(&hwc->prev_count); | |
206 | new_raw_count = metag_pmu->read(idx); | |
207 | ||
208 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | |
209 | new_raw_count) != prev_raw_count) | |
210 | goto again; | |
211 | ||
212 | /* | |
213 | * Calculate the delta and add it to the counter. | |
214 | */ | |
c43ca04b | 215 | delta = (new_raw_count - prev_raw_count) & MAX_PERIOD; |
903b20ad JH |
216 | |
217 | local64_add(delta, &event->count); | |
2033dc54 | 218 | local64_sub(delta, &hwc->period_left); |
903b20ad JH |
219 | } |
220 | ||
221 | int metag_pmu_event_set_period(struct perf_event *event, | |
222 | struct hw_perf_event *hwc, int idx) | |
223 | { | |
224 | s64 left = local64_read(&hwc->period_left); | |
225 | s64 period = hwc->sample_period; | |
226 | int ret = 0; | |
227 | ||
2033dc54 JH |
228 | /* The period may have been changed */ |
229 | if (unlikely(period != hwc->last_period)) | |
230 | left += period - hwc->last_period; | |
231 | ||
903b20ad JH |
232 | if (unlikely(left <= -period)) { |
233 | left = period; | |
234 | local64_set(&hwc->period_left, left); | |
235 | hwc->last_period = period; | |
236 | ret = 1; | |
237 | } | |
238 | ||
239 | if (unlikely(left <= 0)) { | |
240 | left += period; | |
241 | local64_set(&hwc->period_left, left); | |
242 | hwc->last_period = period; | |
243 | ret = 1; | |
244 | } | |
245 | ||
246 | if (left > (s64)metag_pmu->max_period) | |
247 | left = metag_pmu->max_period; | |
248 | ||
c6ac1e6e JH |
249 | if (metag_pmu->write) { |
250 | local64_set(&hwc->prev_count, -(s32)left); | |
251 | metag_pmu->write(idx, -left & MAX_PERIOD); | |
252 | } | |
903b20ad JH |
253 | |
254 | perf_event_update_userpage(event); | |
255 | ||
256 | return ret; | |
257 | } | |
258 | ||
259 | static void metag_pmu_start(struct perf_event *event, int flags) | |
260 | { | |
bd83e65b | 261 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
903b20ad JH |
262 | struct hw_perf_event *hwc = &event->hw; |
263 | int idx = hwc->idx; | |
264 | ||
265 | if (WARN_ON_ONCE(idx == -1)) | |
266 | return; | |
267 | ||
268 | /* | |
269 | * We always have to reprogram the period, so ignore PERF_EF_RELOAD. | |
270 | */ | |
271 | if (flags & PERF_EF_RELOAD) | |
272 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); | |
273 | ||
274 | hwc->state = 0; | |
275 | ||
276 | /* | |
277 | * Reset the period. | |
278 | * Some counters can't be stopped (i.e. are core global), so when the | |
279 | * counter was 'stopped' we merely disabled the IRQ. If we don't reset | |
280 | * the period, then we'll either: a) get an overflow too soon; | |
281 | * or b) too late if the overflow happened since disabling. | |
282 | * Obviously, this has little bearing on cores without the overflow | |
283 | * interrupt, as the performance counter resets to zero on write | |
284 | * anyway. | |
285 | */ | |
286 | if (metag_pmu->max_period) | |
287 | metag_pmu_event_set_period(event, hwc, hwc->idx); | |
288 | cpuc->events[idx] = event; | |
289 | metag_pmu->enable(hwc, idx); | |
290 | } | |
291 | ||
292 | static void metag_pmu_stop(struct perf_event *event, int flags) | |
293 | { | |
294 | struct hw_perf_event *hwc = &event->hw; | |
295 | ||
296 | /* | |
297 | * We should always update the counter on stop; see comment above | |
298 | * why. | |
299 | */ | |
300 | if (!(hwc->state & PERF_HES_STOPPED)) { | |
301 | metag_pmu_event_update(event, hwc, hwc->idx); | |
302 | metag_pmu->disable(hwc, hwc->idx); | |
303 | hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | |
304 | } | |
305 | } | |
306 | ||
307 | static int metag_pmu_add(struct perf_event *event, int flags) | |
308 | { | |
bd83e65b | 309 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
903b20ad JH |
310 | struct hw_perf_event *hwc = &event->hw; |
311 | int idx = 0, ret = 0; | |
312 | ||
313 | perf_pmu_disable(event->pmu); | |
314 | ||
315 | /* check whether we're counting instructions */ | |
316 | if (hwc->config == 0x100) { | |
317 | if (__test_and_set_bit(METAG_INST_COUNTER, | |
318 | cpuc->used_mask)) { | |
319 | ret = -EAGAIN; | |
320 | goto out; | |
321 | } | |
322 | idx = METAG_INST_COUNTER; | |
323 | } else { | |
324 | /* Check whether we have a spare counter */ | |
325 | idx = find_first_zero_bit(cpuc->used_mask, | |
326 | atomic_read(&metag_pmu->active_events)); | |
327 | if (idx >= METAG_INST_COUNTER) { | |
328 | ret = -EAGAIN; | |
329 | goto out; | |
330 | } | |
331 | ||
332 | __set_bit(idx, cpuc->used_mask); | |
333 | } | |
334 | hwc->idx = idx; | |
335 | ||
336 | /* Make sure the counter is disabled */ | |
337 | metag_pmu->disable(hwc, idx); | |
338 | ||
339 | hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; | |
340 | if (flags & PERF_EF_START) | |
341 | metag_pmu_start(event, PERF_EF_RELOAD); | |
342 | ||
343 | perf_event_update_userpage(event); | |
344 | out: | |
345 | perf_pmu_enable(event->pmu); | |
346 | return ret; | |
347 | } | |
348 | ||
349 | static void metag_pmu_del(struct perf_event *event, int flags) | |
350 | { | |
bd83e65b | 351 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
903b20ad JH |
352 | struct hw_perf_event *hwc = &event->hw; |
353 | int idx = hwc->idx; | |
354 | ||
355 | WARN_ON(idx < 0); | |
356 | metag_pmu_stop(event, PERF_EF_UPDATE); | |
357 | cpuc->events[idx] = NULL; | |
358 | __clear_bit(idx, cpuc->used_mask); | |
359 | ||
360 | perf_event_update_userpage(event); | |
361 | } | |
362 | ||
363 | static void metag_pmu_read(struct perf_event *event) | |
364 | { | |
365 | struct hw_perf_event *hwc = &event->hw; | |
366 | ||
367 | /* Don't read disabled counters! */ | |
368 | if (hwc->idx < 0) | |
369 | return; | |
370 | ||
371 | metag_pmu_event_update(event, hwc, hwc->idx); | |
372 | } | |
373 | ||
374 | static struct pmu pmu = { | |
375 | .pmu_enable = metag_pmu_enable, | |
376 | .pmu_disable = metag_pmu_disable, | |
377 | ||
378 | .event_init = metag_pmu_event_init, | |
379 | ||
380 | .add = metag_pmu_add, | |
381 | .del = metag_pmu_del, | |
382 | .start = metag_pmu_start, | |
383 | .stop = metag_pmu_stop, | |
384 | .read = metag_pmu_read, | |
385 | }; | |
386 | ||
387 | /* Core counter specific functions */ | |
388 | static const int metag_general_events[] = { | |
389 | [PERF_COUNT_HW_CPU_CYCLES] = 0x03, | |
390 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x100, | |
391 | [PERF_COUNT_HW_CACHE_REFERENCES] = -1, | |
392 | [PERF_COUNT_HW_CACHE_MISSES] = -1, | |
393 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, | |
394 | [PERF_COUNT_HW_BRANCH_MISSES] = -1, | |
395 | [PERF_COUNT_HW_BUS_CYCLES] = -1, | |
396 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = -1, | |
397 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = -1, | |
398 | [PERF_COUNT_HW_REF_CPU_CYCLES] = -1, | |
399 | }; | |
400 | ||
401 | static const int metag_pmu_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | |
402 | [C(L1D)] = { | |
403 | [C(OP_READ)] = { | |
404 | [C(RESULT_ACCESS)] = 0x08, | |
405 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
406 | }, | |
407 | [C(OP_WRITE)] = { | |
408 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
409 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
410 | }, | |
411 | [C(OP_PREFETCH)] = { | |
412 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
413 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
414 | }, | |
415 | }, | |
416 | [C(L1I)] = { | |
417 | [C(OP_READ)] = { | |
418 | [C(RESULT_ACCESS)] = 0x09, | |
419 | [C(RESULT_MISS)] = 0x0a, | |
420 | }, | |
421 | [C(OP_WRITE)] = { | |
422 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
423 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
424 | }, | |
425 | [C(OP_PREFETCH)] = { | |
426 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
427 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
428 | }, | |
429 | }, | |
430 | [C(LL)] = { | |
431 | [C(OP_READ)] = { | |
432 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
433 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
434 | }, | |
435 | [C(OP_WRITE)] = { | |
436 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
437 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
438 | }, | |
439 | [C(OP_PREFETCH)] = { | |
440 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
441 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
442 | }, | |
443 | }, | |
444 | [C(DTLB)] = { | |
445 | [C(OP_READ)] = { | |
446 | [C(RESULT_ACCESS)] = 0xd0, | |
447 | [C(RESULT_MISS)] = 0xd2, | |
448 | }, | |
449 | [C(OP_WRITE)] = { | |
450 | [C(RESULT_ACCESS)] = 0xd4, | |
451 | [C(RESULT_MISS)] = 0xd5, | |
452 | }, | |
453 | [C(OP_PREFETCH)] = { | |
454 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
455 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
456 | }, | |
457 | }, | |
458 | [C(ITLB)] = { | |
459 | [C(OP_READ)] = { | |
460 | [C(RESULT_ACCESS)] = 0xd1, | |
461 | [C(RESULT_MISS)] = 0xd3, | |
462 | }, | |
463 | [C(OP_WRITE)] = { | |
464 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
465 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
466 | }, | |
467 | [C(OP_PREFETCH)] = { | |
468 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
469 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
470 | }, | |
471 | }, | |
472 | [C(BPU)] = { | |
473 | [C(OP_READ)] = { | |
474 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
475 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
476 | }, | |
477 | [C(OP_WRITE)] = { | |
478 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
479 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
480 | }, | |
481 | [C(OP_PREFETCH)] = { | |
482 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
483 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
484 | }, | |
485 | }, | |
486 | [C(NODE)] = { | |
487 | [C(OP_READ)] = { | |
488 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
489 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
490 | }, | |
491 | [C(OP_WRITE)] = { | |
492 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
493 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
494 | }, | |
495 | [C(OP_PREFETCH)] = { | |
496 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | |
497 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | |
498 | }, | |
499 | }, | |
500 | }; | |
501 | ||
502 | ||
503 | static void _hw_perf_event_destroy(struct perf_event *event) | |
504 | { | |
505 | atomic_t *active_events = &metag_pmu->active_events; | |
506 | struct mutex *pmu_mutex = &metag_pmu->reserve_mutex; | |
507 | ||
508 | if (atomic_dec_and_mutex_lock(active_events, pmu_mutex)) { | |
509 | release_pmu_hardware(); | |
510 | mutex_unlock(pmu_mutex); | |
511 | } | |
512 | } | |
513 | ||
514 | static int _hw_perf_cache_event(int config, int *evp) | |
515 | { | |
516 | unsigned long type, op, result; | |
517 | int ev; | |
518 | ||
519 | if (!metag_pmu->cache_events) | |
520 | return -EINVAL; | |
521 | ||
522 | /* Unpack config */ | |
523 | type = config & 0xff; | |
524 | op = (config >> 8) & 0xff; | |
525 | result = (config >> 16) & 0xff; | |
526 | ||
527 | if (type >= PERF_COUNT_HW_CACHE_MAX || | |
528 | op >= PERF_COUNT_HW_CACHE_OP_MAX || | |
529 | result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | |
530 | return -EINVAL; | |
531 | ||
532 | ev = (*metag_pmu->cache_events)[type][op][result]; | |
533 | if (ev == 0) | |
534 | return -EOPNOTSUPP; | |
535 | if (ev == -1) | |
536 | return -EINVAL; | |
537 | *evp = ev; | |
538 | return 0; | |
539 | } | |
540 | ||
541 | static int _hw_perf_event_init(struct perf_event *event) | |
542 | { | |
543 | struct perf_event_attr *attr = &event->attr; | |
544 | struct hw_perf_event *hwc = &event->hw; | |
545 | int mapping = 0, err; | |
546 | ||
547 | switch (attr->type) { | |
548 | case PERF_TYPE_HARDWARE: | |
549 | if (attr->config >= PERF_COUNT_HW_MAX) | |
550 | return -EINVAL; | |
551 | ||
552 | mapping = metag_pmu->event_map(attr->config); | |
553 | break; | |
554 | ||
555 | case PERF_TYPE_HW_CACHE: | |
556 | err = _hw_perf_cache_event(attr->config, &mapping); | |
557 | if (err) | |
558 | return err; | |
559 | break; | |
f27086f5 JH |
560 | |
561 | case PERF_TYPE_RAW: | |
562 | mapping = attr->config; | |
563 | break; | |
903b20ad JH |
564 | } |
565 | ||
566 | /* Return early if the event is unsupported */ | |
567 | if (mapping == -1) | |
568 | return -EINVAL; | |
569 | ||
903b20ad JH |
570 | /* |
571 | * Don't assign an index until the event is placed into the hardware. | |
572 | * -1 signifies that we're still deciding where to put it. On SMP | |
573 | * systems each core has its own set of counters, so we can't do any | |
574 | * constraint checking yet. | |
575 | */ | |
576 | hwc->idx = -1; | |
577 | ||
578 | /* Store the event encoding */ | |
579 | hwc->config |= (unsigned long)mapping; | |
580 | ||
581 | /* | |
582 | * For non-sampling runs, limit the sample_period to half of the | |
583 | * counter width. This way, the new counter value should be less | |
584 | * likely to overtake the previous one (unless there are IRQ latency | |
585 | * issues...) | |
586 | */ | |
587 | if (metag_pmu->max_period) { | |
588 | if (!hwc->sample_period) { | |
589 | hwc->sample_period = metag_pmu->max_period >> 1; | |
590 | hwc->last_period = hwc->sample_period; | |
591 | local64_set(&hwc->period_left, hwc->sample_period); | |
592 | } | |
593 | } | |
594 | ||
595 | return 0; | |
596 | } | |
597 | ||
598 | static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) | |
599 | { | |
bd83e65b | 600 | struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events); |
903b20ad JH |
601 | unsigned int config = event->config; |
602 | unsigned int tmp = config & 0xf0; | |
603 | unsigned long flags; | |
604 | ||
605 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | |
606 | ||
607 | /* | |
608 | * Check if we're enabling the instruction counter (index of | |
609 | * MAX_HWEVENTS - 1) | |
610 | */ | |
611 | if (METAG_INST_COUNTER == idx) { | |
612 | WARN_ONCE((config != 0x100), | |
613 | "invalid configuration (%d) for counter (%d)\n", | |
614 | config, idx); | |
1fb4dc5c | 615 | local64_set(&event->prev_count, __core_reg_get(TXTACTCYC)); |
903b20ad JH |
616 | goto unlock; |
617 | } | |
618 | ||
619 | /* Check for a core internal or performance channel event. */ | |
620 | if (tmp) { | |
f5d163aa JH |
621 | /* PERF_ICORE/PERF_CHAN only exist since Meta2 */ |
622 | #ifdef METAC_2_1 | |
f27086f5 | 623 | void *perf_addr; |
903b20ad JH |
624 | |
625 | /* | |
626 | * Anything other than a cycle count will write the low- | |
627 | * nibble to the correct counter register. | |
628 | */ | |
629 | switch (tmp) { | |
630 | case 0xd0: | |
631 | perf_addr = (void *)PERF_ICORE(idx); | |
632 | break; | |
633 | ||
634 | case 0xf0: | |
635 | perf_addr = (void *)PERF_CHAN(idx); | |
636 | break; | |
f27086f5 JH |
637 | |
638 | default: | |
639 | perf_addr = NULL; | |
640 | break; | |
903b20ad JH |
641 | } |
642 | ||
f27086f5 JH |
643 | if (perf_addr) |
644 | metag_out32((config & 0x0f), perf_addr); | |
f5d163aa | 645 | #endif |
903b20ad JH |
646 | |
647 | /* | |
648 | * Now we use the high nibble as the performance event to | |
649 | * to count. | |
650 | */ | |
651 | config = tmp >> 4; | |
652 | } | |
653 | ||
903b20ad | 654 | tmp = ((config & 0xf) << 28) | |
9344de1b | 655 | ((1 << 24) << hard_processor_id()); |
db59932f JH |
656 | if (metag_pmu->max_period) |
657 | /* | |
658 | * Cores supporting overflow interrupts may have had the counter | |
659 | * set to a specific value that needs preserving. | |
660 | */ | |
661 | tmp |= metag_in32(PERF_COUNT(idx)) & 0x00ffffff; | |
c6ac1e6e JH |
662 | else |
663 | /* | |
664 | * Older cores reset the counter on write, so prev_count needs | |
665 | * resetting too so we can calculate a correct delta. | |
666 | */ | |
667 | local64_set(&event->prev_count, 0); | |
db59932f | 668 | |
903b20ad JH |
669 | metag_out32(tmp, PERF_COUNT(idx)); |
670 | unlock: | |
671 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); | |
672 | } | |
673 | ||
674 | static void metag_pmu_disable_counter(struct hw_perf_event *event, int idx) | |
675 | { | |
bd83e65b | 676 | struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events); |
903b20ad JH |
677 | unsigned int tmp = 0; |
678 | unsigned long flags; | |
679 | ||
680 | /* | |
681 | * The cycle counter can't be disabled per se, as it's a hardware | |
682 | * thread register which is always counting. We merely return if this | |
683 | * is the counter we're attempting to disable. | |
684 | */ | |
685 | if (METAG_INST_COUNTER == idx) | |
686 | return; | |
687 | ||
688 | /* | |
689 | * The counter value _should_ have been read prior to disabling, | |
690 | * as if we're running on an early core then the value gets reset to | |
691 | * 0, and any read after that would be useless. On the newer cores, | |
692 | * however, it's better to read-modify-update this for purposes of | |
693 | * the overflow interrupt. | |
694 | * Here we remove the thread id AND the event nibble (there are at | |
695 | * least two events that count events that are core global and ignore | |
696 | * the thread id mask). This only works because we don't mix thread | |
697 | * performance counts, and event 0x00 requires a thread id mask! | |
698 | */ | |
699 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | |
700 | ||
701 | tmp = metag_in32(PERF_COUNT(idx)); | |
702 | tmp &= 0x00ffffff; | |
703 | metag_out32(tmp, PERF_COUNT(idx)); | |
704 | ||
705 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); | |
706 | } | |
707 | ||
708 | static u64 metag_pmu_read_counter(int idx) | |
709 | { | |
710 | u32 tmp = 0; | |
711 | ||
903b20ad | 712 | if (METAG_INST_COUNTER == idx) { |
1fb4dc5c | 713 | tmp = __core_reg_get(TXTACTCYC); |
903b20ad JH |
714 | goto out; |
715 | } | |
716 | ||
717 | tmp = metag_in32(PERF_COUNT(idx)) & 0x00ffffff; | |
718 | out: | |
719 | return tmp; | |
720 | } | |
721 | ||
722 | static void metag_pmu_write_counter(int idx, u32 val) | |
723 | { | |
bd83e65b | 724 | struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events); |
903b20ad JH |
725 | u32 tmp = 0; |
726 | unsigned long flags; | |
727 | ||
728 | /* | |
729 | * This _shouldn't_ happen, but if it does, then we can just | |
730 | * ignore the write, as the register is read-only and clear-on-write. | |
731 | */ | |
732 | if (METAG_INST_COUNTER == idx) | |
733 | return; | |
734 | ||
735 | /* | |
736 | * We'll keep the thread mask and event id, and just update the | |
737 | * counter itself. Also , we should bound the value to 24-bits. | |
738 | */ | |
739 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | |
740 | ||
741 | val &= 0x00ffffff; | |
742 | tmp = metag_in32(PERF_COUNT(idx)) & 0xff000000; | |
743 | val |= tmp; | |
744 | metag_out32(val, PERF_COUNT(idx)); | |
745 | ||
746 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); | |
747 | } | |
748 | ||
749 | static int metag_pmu_event_map(int idx) | |
750 | { | |
751 | return metag_general_events[idx]; | |
752 | } | |
753 | ||
754 | static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev) | |
755 | { | |
756 | int idx = (int)dev; | |
bd83e65b | 757 | struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); |
903b20ad JH |
758 | struct perf_event *event = cpuhw->events[idx]; |
759 | struct hw_perf_event *hwc = &event->hw; | |
760 | struct pt_regs *regs = get_irq_regs(); | |
761 | struct perf_sample_data sampledata; | |
762 | unsigned long flags; | |
763 | u32 counter = 0; | |
764 | ||
765 | /* | |
766 | * We need to stop the core temporarily from generating another | |
767 | * interrupt while we disable this counter. However, we don't want | |
768 | * to flag the counter as free | |
769 | */ | |
770 | __global_lock2(flags); | |
771 | counter = metag_in32(PERF_COUNT(idx)); | |
772 | metag_out32((counter & 0x00ffffff), PERF_COUNT(idx)); | |
773 | __global_unlock2(flags); | |
774 | ||
775 | /* Update the counts and reset the sample period */ | |
776 | metag_pmu_event_update(event, hwc, idx); | |
777 | perf_sample_data_init(&sampledata, 0, hwc->last_period); | |
778 | metag_pmu_event_set_period(event, hwc, idx); | |
779 | ||
780 | /* | |
781 | * Enable the counter again once core overflow processing has | |
db59932f JH |
782 | * completed. Note the counter value may have been modified while it was |
783 | * inactive to set it up ready for the next interrupt. | |
903b20ad | 784 | */ |
db59932f JH |
785 | if (!perf_event_overflow(event, &sampledata, regs)) { |
786 | __global_lock2(flags); | |
787 | counter = (counter & 0xff000000) | | |
788 | (metag_in32(PERF_COUNT(idx)) & 0x00ffffff); | |
903b20ad | 789 | metag_out32(counter, PERF_COUNT(idx)); |
db59932f JH |
790 | __global_unlock2(flags); |
791 | } | |
903b20ad JH |
792 | |
793 | return IRQ_HANDLED; | |
794 | } | |
795 | ||
796 | static struct metag_pmu _metag_pmu = { | |
797 | .handle_irq = metag_pmu_counter_overflow, | |
798 | .enable = metag_pmu_enable_counter, | |
799 | .disable = metag_pmu_disable_counter, | |
800 | .read = metag_pmu_read_counter, | |
801 | .write = metag_pmu_write_counter, | |
802 | .event_map = metag_pmu_event_map, | |
803 | .cache_events = &metag_pmu_cache_events, | |
804 | .max_period = MAX_PERIOD, | |
805 | .max_events = MAX_HWEVENTS, | |
806 | }; | |
807 | ||
808 | /* PMU CPU hotplug notifier */ | |
54be16e7 PG |
809 | static int metag_pmu_cpu_notify(struct notifier_block *b, unsigned long action, |
810 | void *hcpu) | |
903b20ad JH |
811 | { |
812 | unsigned int cpu = (unsigned int)hcpu; | |
813 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | |
814 | ||
815 | if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) | |
816 | return NOTIFY_DONE; | |
817 | ||
818 | memset(cpuc, 0, sizeof(struct cpu_hw_events)); | |
819 | raw_spin_lock_init(&cpuc->pmu_lock); | |
820 | ||
821 | return NOTIFY_OK; | |
822 | } | |
823 | ||
54be16e7 | 824 | static struct notifier_block metag_pmu_notifier = { |
903b20ad JH |
825 | .notifier_call = metag_pmu_cpu_notify, |
826 | }; | |
827 | ||
828 | /* PMU Initialisation */ | |
829 | static int __init init_hw_perf_events(void) | |
830 | { | |
831 | int ret = 0, cpu; | |
832 | u32 version = *(u32 *)METAC_ID; | |
833 | int major = (version & METAC_ID_MAJOR_BITS) >> METAC_ID_MAJOR_S; | |
834 | int min_rev = (version & (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) | |
835 | >> METAC_ID_REV_S; | |
836 | ||
837 | /* Not a Meta 2 core, then not supported */ | |
838 | if (0x02 > major) { | |
839 | pr_info("no hardware counter support available\n"); | |
840 | goto out; | |
841 | } else if (0x02 == major) { | |
842 | metag_pmu = &_metag_pmu; | |
843 | ||
844 | if (min_rev < 0x0104) { | |
845 | /* | |
846 | * A core without overflow interrupts, and clear-on- | |
847 | * write counters. | |
848 | */ | |
849 | metag_pmu->handle_irq = NULL; | |
850 | metag_pmu->write = NULL; | |
851 | metag_pmu->max_period = 0; | |
852 | } | |
853 | ||
f27086f5 | 854 | metag_pmu->name = "meta2"; |
903b20ad JH |
855 | metag_pmu->version = version; |
856 | metag_pmu->pmu = pmu; | |
857 | } | |
858 | ||
859 | pr_info("enabled with %s PMU driver, %d counters available\n", | |
860 | metag_pmu->name, metag_pmu->max_events); | |
861 | ||
1b92722f VW |
862 | /* |
863 | * Early cores have "limited" counters - they have no overflow | |
864 | * interrupts - and so are unable to do sampling without extra work | |
865 | * and timer assistance. | |
866 | */ | |
867 | if (metag_pmu->max_period == 0) { | |
868 | metag_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; | |
869 | } | |
870 | ||
903b20ad JH |
871 | /* Initialise the active events and reservation mutex */ |
872 | atomic_set(&metag_pmu->active_events, 0); | |
873 | mutex_init(&metag_pmu->reserve_mutex); | |
874 | ||
875 | /* Clear the counters */ | |
876 | metag_out32(0, PERF_COUNT(0)); | |
877 | metag_out32(0, PERF_COUNT(1)); | |
878 | ||
879 | for_each_possible_cpu(cpu) { | |
880 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | |
881 | ||
882 | memset(cpuc, 0, sizeof(struct cpu_hw_events)); | |
883 | raw_spin_lock_init(&cpuc->pmu_lock); | |
884 | } | |
885 | ||
886 | register_cpu_notifier(&metag_pmu_notifier); | |
03d8e80b | 887 | ret = perf_pmu_register(&pmu, metag_pmu->name, PERF_TYPE_RAW); |
903b20ad JH |
888 | out: |
889 | return ret; | |
890 | } | |
891 | early_initcall(init_hw_perf_events); |