2 * Performance counter support - powerpc architecture code
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 #include <linux/kernel.h>
12 #include <linux/sched.h>
13 #include <linux/perf_counter.h>
14 #include <linux/percpu.h>
15 #include <linux/hardirq.h>
18 #include <asm/machdep.h>
19 #include <asm/firmware.h>
21 struct cpu_hw_counters {
26 struct perf_counter *counter[MAX_HWCOUNTERS];
27 unsigned int events[MAX_HWCOUNTERS];
31 DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
33 struct power_pmu *ppmu;
35 void perf_counter_print_debug(void)
40 * Read one performance monitor counter (PMC).
42 static unsigned long read_pmc(int idx)
48 val = mfspr(SPRN_PMC1);
51 val = mfspr(SPRN_PMC2);
54 val = mfspr(SPRN_PMC3);
57 val = mfspr(SPRN_PMC4);
60 val = mfspr(SPRN_PMC5);
63 val = mfspr(SPRN_PMC6);
66 val = mfspr(SPRN_PMC7);
69 val = mfspr(SPRN_PMC8);
72 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
81 static void write_pmc(int idx, unsigned long val)
85 mtspr(SPRN_PMC1, val);
88 mtspr(SPRN_PMC2, val);
91 mtspr(SPRN_PMC3, val);
94 mtspr(SPRN_PMC4, val);
97 mtspr(SPRN_PMC5, val);
100 mtspr(SPRN_PMC6, val);
103 mtspr(SPRN_PMC7, val);
106 mtspr(SPRN_PMC8, val);
109 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
114 * Check if a set of events can all go on the PMU at once.
115 * If they can't, this will look at alternative codes for the events
116 * and see if any combination of alternative codes is feasible.
117 * The feasible set is returned in event[].
119 static int power_check_constraints(unsigned int event[], int n_ev)
122 unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
123 u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
124 u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
125 u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
126 int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
128 u64 addf = ppmu->add_fields;
129 u64 tadd = ppmu->test_adder;
131 if (n_ev > ppmu->n_counter)
134 /* First see if the events will go on as-is */
135 for (i = 0; i < n_ev; ++i) {
136 alternatives[i][0] = event[i];
137 if (ppmu->get_constraint(event[i], &amasks[i][0],
143 for (i = 0; i < n_ev; ++i) {
144 nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf);
145 if ((((nv + tadd) ^ value) & mask) != 0 ||
146 (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0)
149 mask |= amasks[i][0];
152 return 0; /* all OK */
154 /* doesn't work, gather alternatives... */
155 if (!ppmu->get_alternatives)
157 for (i = 0; i < n_ev; ++i) {
158 n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]);
159 for (j = 1; j < n_alt[i]; ++j)
160 ppmu->get_constraint(alternatives[i][j],
161 &amasks[i][j], &avalues[i][j]);
164 /* enumerate all possibilities and see if any will work */
167 value = mask = nv = 0;
170 /* we're backtracking, restore context */
176 * See if any alternative k for event i,
177 * where k > j, will satisfy the constraints.
179 while (++j < n_alt[i]) {
180 nv = (value | avalues[i][j]) +
181 (value & avalues[i][j] & addf);
182 if ((((nv + tadd) ^ value) & mask) == 0 &&
183 (((nv + tadd) ^ avalues[i][j])
184 & amasks[i][j]) == 0)
189 * No feasible alternative, backtrack
190 * to event i-1 and continue enumerating its
191 * alternatives from where we got up to.
197 * Found a feasible alternative for event i,
198 * remember where we got up to with this event,
199 * go on to the next event, and start with
200 * the first alternative for it.
206 mask |= amasks[i][j];
212 /* OK, we have a feasible combination, tell the caller the solution */
213 for (i = 0; i < n_ev; ++i)
214 event[i] = alternatives[i][choice[i]];
219 * Check if newly-added counters have consistent settings for
220 * exclude_{user,kernel,hv} with each other and any previously
223 static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
227 struct perf_counter *counter;
233 eu = ctrs[0]->hw_event.exclude_user;
234 ek = ctrs[0]->hw_event.exclude_kernel;
235 eh = ctrs[0]->hw_event.exclude_hv;
238 for (i = n_prev; i < n; ++i) {
240 if (counter->hw_event.exclude_user != eu ||
241 counter->hw_event.exclude_kernel != ek ||
242 counter->hw_event.exclude_hv != eh)
248 static void power_perf_read(struct perf_counter *counter)
250 long val, delta, prev;
252 if (!counter->hw.idx)
255 * Performance monitor interrupts come even when interrupts
256 * are soft-disabled, as long as interrupts are hard-enabled.
257 * Therefore we treat them like NMIs.
260 prev = atomic64_read(&counter->hw.prev_count);
262 val = read_pmc(counter->hw.idx);
263 } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
265 /* The counters are only 32 bits wide */
266 delta = (val - prev) & 0xfffffffful;
267 atomic64_add(delta, &counter->count);
268 atomic64_sub(delta, &counter->hw.period_left);
272 * Disable all counters to prevent PMU interrupts and to allow
273 * counters to be added or removed.
275 u64 hw_perf_save_disable(void)
277 struct cpu_hw_counters *cpuhw;
281 local_irq_save(flags);
282 cpuhw = &__get_cpu_var(cpu_hw_counters);
284 ret = cpuhw->disabled;
290 * Check if we ever enabled the PMU on this cpu.
292 if (!cpuhw->pmcs_enabled) {
293 if (ppc_md.enable_pmcs)
294 ppc_md.enable_pmcs();
295 cpuhw->pmcs_enabled = 1;
299 * Set the 'freeze counters' bit.
300 * The barrier is to make sure the mtspr has been
301 * executed and the PMU has frozen the counters
304 mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
307 local_irq_restore(flags);
312 * Re-enable all counters if disable == 0.
313 * If we were previously disabled and counters were added, then
314 * put the new config on the PMU.
316 void hw_perf_restore(u64 disable)
318 struct perf_counter *counter;
319 struct cpu_hw_counters *cpuhw;
324 unsigned int hwc_index[MAX_HWCOUNTERS];
328 local_irq_save(flags);
329 cpuhw = &__get_cpu_var(cpu_hw_counters);
333 * If we didn't change anything, or only removed counters,
334 * no need to recalculate MMCR* settings and reset the PMCs.
335 * Just reenable the PMU with the current MMCR* settings
336 * (possibly updated for removal of counters).
338 if (!cpuhw->n_added) {
339 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
340 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
341 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
342 if (cpuhw->n_counters == 0)
343 get_lppaca()->pmcregs_in_use = 0;
348 * Compute MMCR* values for the new set of counters
350 if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
352 /* shouldn't ever get here */
353 printk(KERN_ERR "oops compute_mmcr failed\n");
358 * Add in MMCR0 freeze bits corresponding to the
359 * hw_event.exclude_* bits for the first counter.
360 * We have already checked that all counters have the
361 * same values for these bits as the first counter.
363 counter = cpuhw->counter[0];
364 if (counter->hw_event.exclude_user)
365 cpuhw->mmcr[0] |= MMCR0_FCP;
366 if (counter->hw_event.exclude_kernel)
367 cpuhw->mmcr[0] |= MMCR0_FCS;
368 if (counter->hw_event.exclude_hv)
369 cpuhw->mmcr[0] |= MMCR0_FCHV;
372 * Write the new configuration to MMCR* with the freeze
373 * bit set and set the hardware counters to their initial values.
374 * Then unfreeze the counters.
376 get_lppaca()->pmcregs_in_use = 1;
377 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
378 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
379 mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
383 * Read off any pre-existing counters that need to move
386 for (i = 0; i < cpuhw->n_counters; ++i) {
387 counter = cpuhw->counter[i];
388 if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
389 power_perf_read(counter);
390 write_pmc(counter->hw.idx, 0);
396 * Initialize the PMCs for all the new and moved counters.
398 for (i = 0; i < cpuhw->n_counters; ++i) {
399 counter = cpuhw->counter[i];
403 if (counter->hw_event.irq_period) {
404 left = atomic64_read(&counter->hw.period_left);
405 if (left < 0x80000000L)
406 val = 0x80000000L - left;
408 atomic64_set(&counter->hw.prev_count, val);
409 counter->hw.idx = hwc_index[i] + 1;
410 write_pmc(counter->hw.idx, val);
413 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
414 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
417 local_irq_restore(flags);
420 static int collect_events(struct perf_counter *group, int max_count,
421 struct perf_counter *ctrs[], unsigned int *events)
424 struct perf_counter *counter;
426 if (!is_software_counter(group)) {
430 events[n++] = group->hw.config;
432 list_for_each_entry(counter, &group->sibling_list, list_entry) {
433 if (!is_software_counter(counter) &&
434 counter->state != PERF_COUNTER_STATE_OFF) {
438 events[n++] = counter->hw.config;
444 static void counter_sched_in(struct perf_counter *counter, int cpu)
446 counter->state = PERF_COUNTER_STATE_ACTIVE;
447 counter->oncpu = cpu;
448 if (is_software_counter(counter))
449 counter->hw_ops->enable(counter);
453 * Called to enable a whole group of counters.
454 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
455 * Assumes the caller has disabled interrupts and has
456 * frozen the PMU with hw_perf_save_disable.
458 int hw_perf_group_sched_in(struct perf_counter *group_leader,
459 struct perf_cpu_context *cpuctx,
460 struct perf_counter_context *ctx, int cpu)
462 struct cpu_hw_counters *cpuhw;
464 struct perf_counter *sub;
466 cpuhw = &__get_cpu_var(cpu_hw_counters);
467 n0 = cpuhw->n_counters;
468 n = collect_events(group_leader, ppmu->n_counter - n0,
469 &cpuhw->counter[n0], &cpuhw->events[n0]);
472 if (check_excludes(cpuhw->counter, n0, n))
474 if (power_check_constraints(cpuhw->events, n + n0))
476 cpuhw->n_counters = n0 + n;
480 * OK, this group can go on; update counter states etc.,
481 * and enable any software counters
483 for (i = n0; i < n0 + n; ++i)
484 cpuhw->counter[i]->hw.config = cpuhw->events[i];
485 cpuctx->active_oncpu += n;
487 counter_sched_in(group_leader, cpu);
488 list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
489 if (sub->state != PERF_COUNTER_STATE_OFF) {
490 counter_sched_in(sub, cpu);
500 * Add a counter to the PMU.
501 * If all counters are not already frozen, then we disable and
502 * re-enable the PMU in order to get hw_perf_restore to do the
503 * actual work of reconfiguring the PMU.
505 static int power_perf_enable(struct perf_counter *counter)
507 struct cpu_hw_counters *cpuhw;
513 local_irq_save(flags);
514 pmudis = hw_perf_save_disable();
517 * Add the counter to the list (if there is room)
518 * and check whether the total set is still feasible.
520 cpuhw = &__get_cpu_var(cpu_hw_counters);
521 n0 = cpuhw->n_counters;
522 if (n0 >= ppmu->n_counter)
524 cpuhw->counter[n0] = counter;
525 cpuhw->events[n0] = counter->hw.config;
526 if (check_excludes(cpuhw->counter, n0, 1))
528 if (power_check_constraints(cpuhw->events, n0 + 1))
531 counter->hw.config = cpuhw->events[n0];
537 hw_perf_restore(pmudis);
538 local_irq_restore(flags);
543 * Remove a counter from the PMU.
545 static void power_perf_disable(struct perf_counter *counter)
547 struct cpu_hw_counters *cpuhw;
552 local_irq_save(flags);
553 pmudis = hw_perf_save_disable();
555 power_perf_read(counter);
557 cpuhw = &__get_cpu_var(cpu_hw_counters);
558 for (i = 0; i < cpuhw->n_counters; ++i) {
559 if (counter == cpuhw->counter[i]) {
560 while (++i < cpuhw->n_counters)
561 cpuhw->counter[i-1] = cpuhw->counter[i];
563 ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
564 write_pmc(counter->hw.idx, 0);
569 if (cpuhw->n_counters == 0) {
570 /* disable exceptions if no counters are running */
571 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
574 hw_perf_restore(pmudis);
575 local_irq_restore(flags);
578 struct hw_perf_counter_ops power_perf_ops = {
579 .enable = power_perf_enable,
580 .disable = power_perf_disable,
581 .read = power_perf_read
584 const struct hw_perf_counter_ops *
585 hw_perf_counter_init(struct perf_counter *counter)
588 struct perf_counter *ctrs[MAX_HWCOUNTERS];
589 unsigned int events[MAX_HWCOUNTERS];
594 if ((s64)counter->hw_event.irq_period < 0)
596 ev = counter->hw_event.type;
597 if (!counter->hw_event.raw) {
598 if (ev >= ppmu->n_generic ||
599 ppmu->generic_events[ev] == 0)
601 ev = ppmu->generic_events[ev];
603 counter->hw.config_base = ev;
607 * If we are not running on a hypervisor, force the
608 * exclude_hv bit to 0 so that we don't care what
609 * the user set it to. This also means that we don't
610 * set the MMCR0_FCHV bit, which unconditionally freezes
611 * the counters on the PPC970 variants used in Apple G5
612 * machines (since MSR.HV is always 1 on those machines).
614 if (!firmware_has_feature(FW_FEATURE_LPAR))
615 counter->hw_event.exclude_hv = 0;
618 * If this is in a group, check if it can go on with all the
619 * other hardware counters in the group. We assume the counter
620 * hasn't been linked into its leader's sibling list at this point.
623 if (counter->group_leader != counter) {
624 n = collect_events(counter->group_leader, ppmu->n_counter - 1,
630 if (check_excludes(ctrs, n, 1))
632 if (power_check_constraints(events, n + 1))
635 counter->hw.config = events[n];
636 atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
637 return &power_perf_ops;
643 void perf_counter_do_pending(void)
646 struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
647 struct perf_counter *counter;
649 set_perf_counter_pending(0);
650 for (i = 0; i < cpuhw->n_counters; ++i) {
651 counter = cpuhw->counter[i];
652 if (counter && counter->wakeup_pending) {
653 counter->wakeup_pending = 0;
654 wake_up(&counter->waitq);
660 * Record data for an irq counter.
661 * This function was lifted from the x86 code; maybe it should
664 static void perf_store_irq_data(struct perf_counter *counter, u64 data)
666 struct perf_data *irqdata = counter->irqdata;
668 if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
671 u64 *p = (u64 *) &irqdata->data[irqdata->len];
674 irqdata->len += sizeof(u64);
679 * Record all the values of the counters in a group
681 static void perf_handle_group(struct perf_counter *counter)
683 struct perf_counter *leader, *sub;
685 leader = counter->group_leader;
686 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
688 sub->hw_ops->read(sub);
689 perf_store_irq_data(counter, sub->hw_event.type);
690 perf_store_irq_data(counter, atomic64_read(&sub->count));
695 * A counter has overflowed; update its count and record
696 * things if requested. Note that interrupts are hard-disabled
697 * here so there is no possibility of being interrupted.
699 static void record_and_restart(struct perf_counter *counter, long val,
700 struct pt_regs *regs)
702 s64 prev, delta, left;
705 /* we don't have to worry about interrupts here */
706 prev = atomic64_read(&counter->hw.prev_count);
707 delta = (val - prev) & 0xfffffffful;
708 atomic64_add(delta, &counter->count);
711 * See if the total period for this counter has expired,
712 * and update for the next period.
715 left = atomic64_read(&counter->hw.period_left) - delta;
716 if (counter->hw_event.irq_period) {
718 left += counter->hw_event.irq_period;
720 left = counter->hw_event.irq_period;
723 if (left < 0x80000000L)
724 val = 0x80000000L - left;
726 write_pmc(counter->hw.idx, val);
727 atomic64_set(&counter->hw.prev_count, val);
728 atomic64_set(&counter->hw.period_left, left);
731 * Finally record data if requested.
734 switch (counter->hw_event.record_type) {
735 case PERF_RECORD_SIMPLE:
737 case PERF_RECORD_IRQ:
738 perf_store_irq_data(counter, instruction_pointer(regs));
739 counter->wakeup_pending = 1;
741 case PERF_RECORD_GROUP:
742 perf_handle_group(counter);
743 counter->wakeup_pending = 1;
750 * Performance monitor interrupt stuff
752 static void perf_counter_interrupt(struct pt_regs *regs)
755 struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
756 struct perf_counter *counter;
758 int need_wakeup = 0, found = 0;
760 for (i = 0; i < cpuhw->n_counters; ++i) {
761 counter = cpuhw->counter[i];
762 val = read_pmc(counter->hw.idx);
764 /* counter has overflowed */
766 record_and_restart(counter, val, regs);
767 if (counter->wakeup_pending)
773 * In case we didn't find and reset the counter that caused
774 * the interrupt, scan all counters and reset any that are
775 * negative, to avoid getting continual interrupts.
776 * Any that we processed in the previous loop will not be negative.
779 for (i = 0; i < ppmu->n_counter; ++i) {
780 val = read_pmc(i + 1);
787 * Reset MMCR0 to its normal value. This will set PMXE and
788 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
789 * and thus allow interrupts to occur again.
790 * XXX might want to use MSR.PM to keep the counters frozen until
791 * we get back out of this interrupt.
793 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
796 * If we need a wakeup, check whether interrupts were soft-enabled
797 * when we took the interrupt. If they were, we can wake stuff up
798 * immediately; otherwise we'll have to set a flag and do the
799 * wakeup when interrupts get soft-enabled.
804 perf_counter_do_pending();
807 set_perf_counter_pending(1);
812 void hw_perf_counter_setup(int cpu)
814 struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
816 memset(cpuhw, 0, sizeof(*cpuhw));
817 cpuhw->mmcr[0] = MMCR0_FC;
820 extern struct power_pmu ppc970_pmu;
821 extern struct power_pmu power6_pmu;
823 static int init_perf_counters(void)
827 if (reserve_pmc_hardware(perf_counter_interrupt)) {
828 printk(KERN_ERR "Couldn't init performance monitor subsystem\n");
832 /* XXX should get this from cputable */
833 pvr = mfspr(SPRN_PVR);
834 switch (PVR_VER(pvr)) {
847 arch_initcall(init_perf_counters);