2 * Meta performance counter support.
3 * Copyright (C) 2012 Imagination Technologies Ltd
5 * This code is based on the sh pmu code:
6 * Copyright (C) 2009 Paul Mundt
8 * and on the arm pmu code:
9 * Copyright (C) 2009 picoChip Designs, Ltd., James Iles
10 * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
12 * This file is subject to the terms and conditions of the GNU General Public
13 * License. See the file "COPYING" in the main directory of this archive
17 #include <linux/atomic.h>
18 #include <linux/export.h>
19 #include <linux/init.h>
20 #include <linux/irqchip/metag.h>
21 #include <linux/perf_event.h>
22 #include <linux/slab.h>
24 #include <asm/core_reg.h>
27 #include <asm/processor.h>
29 #include "perf_event.h"
31 static int _hw_perf_event_init(struct perf_event *);
32 static void _hw_perf_event_destroy(struct perf_event *);
34 /* Determines which core type we are */
35 static struct metag_pmu *metag_pmu __read_mostly;
37 /* Processor specific data */
38 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
41 const char *perf_pmu_name(void)
46 return metag_pmu->name;
48 EXPORT_SYMBOL_GPL(perf_pmu_name);
50 int perf_num_counters(void)
53 return metag_pmu->max_events;
57 EXPORT_SYMBOL_GPL(perf_num_counters);
59 static inline int metag_pmu_initialised(void)
64 static void release_pmu_hardware(void)
67 unsigned int version = (metag_pmu->version &
68 (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
71 /* Early cores don't have overflow interrupts */
75 irq = internal_irq_map(17);
77 free_irq(irq, (void *)1);
79 irq = internal_irq_map(16);
81 free_irq(irq, (void *)0);
84 static int reserve_pmu_hardware(void)
87 unsigned int version = (metag_pmu->version &
88 (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
91 /* Early cores don't have overflow interrupts */
96 * Bit 16 on HWSTATMETA is the interrupt for performance counter 0;
97 * similarly, 17 is the interrupt for performance counter 1.
98 * We can't (yet) interrupt on the cycle counter, because it's a
99 * register, however it holds a 32-bit value as opposed to 24-bit.
101 irq[0] = internal_irq_map(16);
103 pr_err("unable to map internal IRQ %d\n", 16);
106 err = request_irq(irq[0], metag_pmu->handle_irq, IRQF_NOBALANCING,
107 "metagpmu0", (void *)0);
109 pr_err("unable to request IRQ%d for metag PMU counters\n",
114 irq[1] = internal_irq_map(17);
116 pr_err("unable to map internal IRQ %d\n", 17);
119 err = request_irq(irq[1], metag_pmu->handle_irq, IRQF_NOBALANCING,
120 "metagpmu1", (void *)1);
122 pr_err("unable to request IRQ%d for metag PMU counters\n",
130 free_irq(irq[0], (void *)0);
136 static void metag_pmu_enable(struct pmu *pmu)
140 static void metag_pmu_disable(struct pmu *pmu)
144 static int metag_pmu_event_init(struct perf_event *event)
147 atomic_t *active_events = &metag_pmu->active_events;
149 if (!metag_pmu_initialised()) {
154 if (has_branch_stack(event))
157 event->destroy = _hw_perf_event_destroy;
159 if (!atomic_inc_not_zero(active_events)) {
160 mutex_lock(&metag_pmu->reserve_mutex);
161 if (atomic_read(active_events) == 0)
162 err = reserve_pmu_hardware();
165 atomic_inc(active_events);
167 mutex_unlock(&metag_pmu->reserve_mutex);
170 /* Hardware and caches counters */
171 switch (event->attr.type) {
172 case PERF_TYPE_HARDWARE:
173 case PERF_TYPE_HW_CACHE:
175 err = _hw_perf_event_init(event);
183 event->destroy(event);
189 void metag_pmu_event_update(struct perf_event *event,
190 struct hw_perf_event *hwc, int idx)
192 u64 prev_raw_count, new_raw_count;
196 * If this counter is chained, it may be that the previous counter
197 * value has been changed beneath us.
199 * To get around this, we read and exchange the new raw count, then
200 * add the delta (new - prev) to the generic counter atomically.
202 * Without interrupts, this is the simplest approach.
205 prev_raw_count = local64_read(&hwc->prev_count);
206 new_raw_count = metag_pmu->read(idx);
208 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
209 new_raw_count) != prev_raw_count)
213 * Calculate the delta and add it to the counter.
215 delta = (new_raw_count - prev_raw_count) & MAX_PERIOD;
217 local64_add(delta, &event->count);
218 local64_sub(delta, &hwc->period_left);
221 int metag_pmu_event_set_period(struct perf_event *event,
222 struct hw_perf_event *hwc, int idx)
224 s64 left = local64_read(&hwc->period_left);
225 s64 period = hwc->sample_period;
228 /* The period may have been changed */
229 if (unlikely(period != hwc->last_period))
230 left += period - hwc->last_period;
232 if (unlikely(left <= -period)) {
234 local64_set(&hwc->period_left, left);
235 hwc->last_period = period;
239 if (unlikely(left <= 0)) {
241 local64_set(&hwc->period_left, left);
242 hwc->last_period = period;
246 if (left > (s64)metag_pmu->max_period)
247 left = metag_pmu->max_period;
249 if (metag_pmu->write) {
250 local64_set(&hwc->prev_count, -(s32)left);
251 metag_pmu->write(idx, -left & MAX_PERIOD);
254 perf_event_update_userpage(event);
259 static void metag_pmu_start(struct perf_event *event, int flags)
261 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
262 struct hw_perf_event *hwc = &event->hw;
265 if (WARN_ON_ONCE(idx == -1))
269 * We always have to reprogram the period, so ignore PERF_EF_RELOAD.
271 if (flags & PERF_EF_RELOAD)
272 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
278 * Some counters can't be stopped (i.e. are core global), so when the
279 * counter was 'stopped' we merely disabled the IRQ. If we don't reset
280 * the period, then we'll either: a) get an overflow too soon;
281 * or b) too late if the overflow happened since disabling.
282 * Obviously, this has little bearing on cores without the overflow
283 * interrupt, as the performance counter resets to zero on write
286 if (metag_pmu->max_period)
287 metag_pmu_event_set_period(event, hwc, hwc->idx);
288 cpuc->events[idx] = event;
289 metag_pmu->enable(hwc, idx);
292 static void metag_pmu_stop(struct perf_event *event, int flags)
294 struct hw_perf_event *hwc = &event->hw;
297 * We should always update the counter on stop; see comment above
300 if (!(hwc->state & PERF_HES_STOPPED)) {
301 metag_pmu_event_update(event, hwc, hwc->idx);
302 metag_pmu->disable(hwc, hwc->idx);
303 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
307 static int metag_pmu_add(struct perf_event *event, int flags)
309 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
310 struct hw_perf_event *hwc = &event->hw;
311 int idx = 0, ret = 0;
313 perf_pmu_disable(event->pmu);
315 /* check whether we're counting instructions */
316 if (hwc->config == 0x100) {
317 if (__test_and_set_bit(METAG_INST_COUNTER,
322 idx = METAG_INST_COUNTER;
324 /* Check whether we have a spare counter */
325 idx = find_first_zero_bit(cpuc->used_mask,
326 atomic_read(&metag_pmu->active_events));
327 if (idx >= METAG_INST_COUNTER) {
332 __set_bit(idx, cpuc->used_mask);
336 /* Make sure the counter is disabled */
337 metag_pmu->disable(hwc, idx);
339 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
340 if (flags & PERF_EF_START)
341 metag_pmu_start(event, PERF_EF_RELOAD);
343 perf_event_update_userpage(event);
345 perf_pmu_enable(event->pmu);
349 static void metag_pmu_del(struct perf_event *event, int flags)
351 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
352 struct hw_perf_event *hwc = &event->hw;
356 metag_pmu_stop(event, PERF_EF_UPDATE);
357 cpuc->events[idx] = NULL;
358 __clear_bit(idx, cpuc->used_mask);
360 perf_event_update_userpage(event);
363 static void metag_pmu_read(struct perf_event *event)
365 struct hw_perf_event *hwc = &event->hw;
367 /* Don't read disabled counters! */
371 metag_pmu_event_update(event, hwc, hwc->idx);
374 static struct pmu pmu = {
375 .pmu_enable = metag_pmu_enable,
376 .pmu_disable = metag_pmu_disable,
378 .event_init = metag_pmu_event_init,
380 .add = metag_pmu_add,
381 .del = metag_pmu_del,
382 .start = metag_pmu_start,
383 .stop = metag_pmu_stop,
384 .read = metag_pmu_read,
387 /* Core counter specific functions */
388 static const int metag_general_events[] = {
389 [PERF_COUNT_HW_CPU_CYCLES] = 0x03,
390 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100,
391 [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
392 [PERF_COUNT_HW_CACHE_MISSES] = -1,
393 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
394 [PERF_COUNT_HW_BRANCH_MISSES] = -1,
395 [PERF_COUNT_HW_BUS_CYCLES] = -1,
396 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = -1,
397 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = -1,
398 [PERF_COUNT_HW_REF_CPU_CYCLES] = -1,
401 static const int metag_pmu_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
404 [C(RESULT_ACCESS)] = 0x08,
405 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
408 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
409 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
412 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
413 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
418 [C(RESULT_ACCESS)] = 0x09,
419 [C(RESULT_MISS)] = 0x0a,
422 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
423 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
426 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
427 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
432 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
433 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
436 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
437 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
440 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
441 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
446 [C(RESULT_ACCESS)] = 0xd0,
447 [C(RESULT_MISS)] = 0xd2,
450 [C(RESULT_ACCESS)] = 0xd4,
451 [C(RESULT_MISS)] = 0xd5,
454 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
455 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
460 [C(RESULT_ACCESS)] = 0xd1,
461 [C(RESULT_MISS)] = 0xd3,
464 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
465 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
468 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
469 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
474 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
475 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
478 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
479 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
482 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
483 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
488 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
489 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
492 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
493 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
496 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
497 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
503 static void _hw_perf_event_destroy(struct perf_event *event)
505 atomic_t *active_events = &metag_pmu->active_events;
506 struct mutex *pmu_mutex = &metag_pmu->reserve_mutex;
508 if (atomic_dec_and_mutex_lock(active_events, pmu_mutex)) {
509 release_pmu_hardware();
510 mutex_unlock(pmu_mutex);
514 static int _hw_perf_cache_event(int config, int *evp)
516 unsigned long type, op, result;
519 if (!metag_pmu->cache_events)
523 type = config & 0xff;
524 op = (config >> 8) & 0xff;
525 result = (config >> 16) & 0xff;
527 if (type >= PERF_COUNT_HW_CACHE_MAX ||
528 op >= PERF_COUNT_HW_CACHE_OP_MAX ||
529 result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
532 ev = (*metag_pmu->cache_events)[type][op][result];
541 static int _hw_perf_event_init(struct perf_event *event)
543 struct perf_event_attr *attr = &event->attr;
544 struct hw_perf_event *hwc = &event->hw;
545 int mapping = 0, err;
547 switch (attr->type) {
548 case PERF_TYPE_HARDWARE:
549 if (attr->config >= PERF_COUNT_HW_MAX)
552 mapping = metag_pmu->event_map(attr->config);
555 case PERF_TYPE_HW_CACHE:
556 err = _hw_perf_cache_event(attr->config, &mapping);
562 mapping = attr->config;
566 /* Return early if the event is unsupported */
571 * Don't assign an index until the event is placed into the hardware.
572 * -1 signifies that we're still deciding where to put it. On SMP
573 * systems each core has its own set of counters, so we can't do any
574 * constraint checking yet.
578 /* Store the event encoding */
579 hwc->config |= (unsigned long)mapping;
582 * For non-sampling runs, limit the sample_period to half of the
583 * counter width. This way, the new counter value should be less
584 * likely to overtake the previous one (unless there are IRQ latency
587 if (metag_pmu->max_period) {
588 if (!hwc->sample_period) {
589 hwc->sample_period = metag_pmu->max_period >> 1;
590 hwc->last_period = hwc->sample_period;
591 local64_set(&hwc->period_left, hwc->sample_period);
598 static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx)
600 struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events);
601 unsigned int config = event->config;
602 unsigned int tmp = config & 0xf0;
605 raw_spin_lock_irqsave(&events->pmu_lock, flags);
608 * Check if we're enabling the instruction counter (index of
611 if (METAG_INST_COUNTER == idx) {
612 WARN_ONCE((config != 0x100),
613 "invalid configuration (%d) for counter (%d)\n",
615 local64_set(&event->prev_count, __core_reg_get(TXTACTCYC));
619 /* Check for a core internal or performance channel event. */
624 * Anything other than a cycle count will write the low-
625 * nibble to the correct counter register.
629 perf_addr = (void *)PERF_ICORE(idx);
633 perf_addr = (void *)PERF_CHAN(idx);
642 metag_out32((config & 0x0f), perf_addr);
645 * Now we use the high nibble as the performance event to
651 tmp = ((config & 0xf) << 28) |
652 ((1 << 24) << hard_processor_id());
653 if (metag_pmu->max_period)
655 * Cores supporting overflow interrupts may have had the counter
656 * set to a specific value that needs preserving.
658 tmp |= metag_in32(PERF_COUNT(idx)) & 0x00ffffff;
661 * Older cores reset the counter on write, so prev_count needs
662 * resetting too so we can calculate a correct delta.
664 local64_set(&event->prev_count, 0);
666 metag_out32(tmp, PERF_COUNT(idx));
668 raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
671 static void metag_pmu_disable_counter(struct hw_perf_event *event, int idx)
673 struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events);
674 unsigned int tmp = 0;
678 * The cycle counter can't be disabled per se, as it's a hardware
679 * thread register which is always counting. We merely return if this
680 * is the counter we're attempting to disable.
682 if (METAG_INST_COUNTER == idx)
686 * The counter value _should_ have been read prior to disabling,
687 * as if we're running on an early core then the value gets reset to
688 * 0, and any read after that would be useless. On the newer cores,
689 * however, it's better to read-modify-update this for purposes of
690 * the overflow interrupt.
691 * Here we remove the thread id AND the event nibble (there are at
692 * least two events that count events that are core global and ignore
693 * the thread id mask). This only works because we don't mix thread
694 * performance counts, and event 0x00 requires a thread id mask!
696 raw_spin_lock_irqsave(&events->pmu_lock, flags);
698 tmp = metag_in32(PERF_COUNT(idx));
700 metag_out32(tmp, PERF_COUNT(idx));
702 raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
705 static u64 metag_pmu_read_counter(int idx)
709 if (METAG_INST_COUNTER == idx) {
710 tmp = __core_reg_get(TXTACTCYC);
714 tmp = metag_in32(PERF_COUNT(idx)) & 0x00ffffff;
719 static void metag_pmu_write_counter(int idx, u32 val)
721 struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events);
726 * This _shouldn't_ happen, but if it does, then we can just
727 * ignore the write, as the register is read-only and clear-on-write.
729 if (METAG_INST_COUNTER == idx)
733 * We'll keep the thread mask and event id, and just update the
734 * counter itself. Also , we should bound the value to 24-bits.
736 raw_spin_lock_irqsave(&events->pmu_lock, flags);
739 tmp = metag_in32(PERF_COUNT(idx)) & 0xff000000;
741 metag_out32(val, PERF_COUNT(idx));
743 raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
746 static int metag_pmu_event_map(int idx)
748 return metag_general_events[idx];
751 static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev)
754 struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
755 struct perf_event *event = cpuhw->events[idx];
756 struct hw_perf_event *hwc = &event->hw;
757 struct pt_regs *regs = get_irq_regs();
758 struct perf_sample_data sampledata;
763 * We need to stop the core temporarily from generating another
764 * interrupt while we disable this counter. However, we don't want
765 * to flag the counter as free
767 __global_lock2(flags);
768 counter = metag_in32(PERF_COUNT(idx));
769 metag_out32((counter & 0x00ffffff), PERF_COUNT(idx));
770 __global_unlock2(flags);
772 /* Update the counts and reset the sample period */
773 metag_pmu_event_update(event, hwc, idx);
774 perf_sample_data_init(&sampledata, 0, hwc->last_period);
775 metag_pmu_event_set_period(event, hwc, idx);
778 * Enable the counter again once core overflow processing has
779 * completed. Note the counter value may have been modified while it was
780 * inactive to set it up ready for the next interrupt.
782 if (!perf_event_overflow(event, &sampledata, regs)) {
783 __global_lock2(flags);
784 counter = (counter & 0xff000000) |
785 (metag_in32(PERF_COUNT(idx)) & 0x00ffffff);
786 metag_out32(counter, PERF_COUNT(idx));
787 __global_unlock2(flags);
793 static struct metag_pmu _metag_pmu = {
794 .handle_irq = metag_pmu_counter_overflow,
795 .enable = metag_pmu_enable_counter,
796 .disable = metag_pmu_disable_counter,
797 .read = metag_pmu_read_counter,
798 .write = metag_pmu_write_counter,
799 .event_map = metag_pmu_event_map,
800 .cache_events = &metag_pmu_cache_events,
801 .max_period = MAX_PERIOD,
802 .max_events = MAX_HWEVENTS,
805 /* PMU CPU hotplug notifier */
806 static int metag_pmu_cpu_notify(struct notifier_block *b, unsigned long action,
809 unsigned int cpu = (unsigned int)hcpu;
810 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
812 if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
815 memset(cpuc, 0, sizeof(struct cpu_hw_events));
816 raw_spin_lock_init(&cpuc->pmu_lock);
821 static struct notifier_block metag_pmu_notifier = {
822 .notifier_call = metag_pmu_cpu_notify,
825 /* PMU Initialisation */
826 static int __init init_hw_perf_events(void)
829 u32 version = *(u32 *)METAC_ID;
830 int major = (version & METAC_ID_MAJOR_BITS) >> METAC_ID_MAJOR_S;
831 int min_rev = (version & (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS))
834 /* Not a Meta 2 core, then not supported */
836 pr_info("no hardware counter support available\n");
838 } else if (0x02 == major) {
839 metag_pmu = &_metag_pmu;
841 if (min_rev < 0x0104) {
843 * A core without overflow interrupts, and clear-on-
846 metag_pmu->handle_irq = NULL;
847 metag_pmu->write = NULL;
848 metag_pmu->max_period = 0;
851 metag_pmu->name = "meta2";
852 metag_pmu->version = version;
853 metag_pmu->pmu = pmu;
856 pr_info("enabled with %s PMU driver, %d counters available\n",
857 metag_pmu->name, metag_pmu->max_events);
860 * Early cores have "limited" counters - they have no overflow
861 * interrupts - and so are unable to do sampling without extra work
862 * and timer assistance.
864 if (metag_pmu->max_period == 0) {
865 metag_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
868 /* Initialise the active events and reservation mutex */
869 atomic_set(&metag_pmu->active_events, 0);
870 mutex_init(&metag_pmu->reserve_mutex);
872 /* Clear the counters */
873 metag_out32(0, PERF_COUNT(0));
874 metag_out32(0, PERF_COUNT(1));
876 for_each_possible_cpu(cpu) {
877 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
879 memset(cpuc, 0, sizeof(struct cpu_hw_events));
880 raw_spin_lock_init(&cpuc->pmu_lock);
883 register_cpu_notifier(&metag_pmu_notifier);
884 ret = perf_pmu_register(&pmu, metag_pmu->name, PERF_TYPE_RAW);
888 early_initcall(init_hw_perf_events);