perfcounters/powerpc: add support for POWER5+ processors
[linux-2.6-block.git] / arch / powerpc / kernel / perf_counter.c
CommitLineData
4574910e
PM
1/*
2 * Performance counter support - powerpc architecture code
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/sched.h>
13#include <linux/perf_counter.h>
14#include <linux/percpu.h>
15#include <linux/hardirq.h>
16#include <asm/reg.h>
17#include <asm/pmc.h>
01d0287f 18#include <asm/machdep.h>
0475f9ea 19#include <asm/firmware.h>
4574910e
PM
20
21struct cpu_hw_counters {
22 int n_counters;
23 int n_percpu;
24 int disabled;
25 int n_added;
26 struct perf_counter *counter[MAX_HWCOUNTERS];
27 unsigned int events[MAX_HWCOUNTERS];
28 u64 mmcr[3];
01d0287f 29 u8 pmcs_enabled;
4574910e
PM
30};
31DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
32
33struct power_pmu *ppmu;
34
d095cd46
PM
35/*
36 * Normally, to ignore kernel events we set the FCS (freeze counters
37 * in supervisor mode) bit in MMCR0, but if the kernel runs with the
38 * hypervisor bit set in the MSR, or if we are running on a processor
39 * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
40 * then we need to use the FCHV bit to ignore kernel events.
41 */
42static unsigned int freeze_counters_kernel = MMCR0_FCS;
43
4574910e
PM
44void perf_counter_print_debug(void)
45{
46}
47
4574910e
PM
48/*
49 * Read one performance monitor counter (PMC).
50 */
51static unsigned long read_pmc(int idx)
52{
53 unsigned long val;
54
55 switch (idx) {
56 case 1:
57 val = mfspr(SPRN_PMC1);
58 break;
59 case 2:
60 val = mfspr(SPRN_PMC2);
61 break;
62 case 3:
63 val = mfspr(SPRN_PMC3);
64 break;
65 case 4:
66 val = mfspr(SPRN_PMC4);
67 break;
68 case 5:
69 val = mfspr(SPRN_PMC5);
70 break;
71 case 6:
72 val = mfspr(SPRN_PMC6);
73 break;
74 case 7:
75 val = mfspr(SPRN_PMC7);
76 break;
77 case 8:
78 val = mfspr(SPRN_PMC8);
79 break;
80 default:
81 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
82 val = 0;
83 }
84 return val;
85}
86
87/*
88 * Write one PMC.
89 */
90static void write_pmc(int idx, unsigned long val)
91{
92 switch (idx) {
93 case 1:
94 mtspr(SPRN_PMC1, val);
95 break;
96 case 2:
97 mtspr(SPRN_PMC2, val);
98 break;
99 case 3:
100 mtspr(SPRN_PMC3, val);
101 break;
102 case 4:
103 mtspr(SPRN_PMC4, val);
104 break;
105 case 5:
106 mtspr(SPRN_PMC5, val);
107 break;
108 case 6:
109 mtspr(SPRN_PMC6, val);
110 break;
111 case 7:
112 mtspr(SPRN_PMC7, val);
113 break;
114 case 8:
115 mtspr(SPRN_PMC8, val);
116 break;
117 default:
118 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
119 }
120}
121
122/*
123 * Check if a set of events can all go on the PMU at once.
124 * If they can't, this will look at alternative codes for the events
125 * and see if any combination of alternative codes is feasible.
126 * The feasible set is returned in event[].
127 */
128static int power_check_constraints(unsigned int event[], int n_ev)
129{
130 u64 mask, value, nv;
131 unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
132 u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
133 u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
134 u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
135 int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
136 int i, j;
137 u64 addf = ppmu->add_fields;
138 u64 tadd = ppmu->test_adder;
139
140 if (n_ev > ppmu->n_counter)
141 return -1;
142
143 /* First see if the events will go on as-is */
144 for (i = 0; i < n_ev; ++i) {
145 alternatives[i][0] = event[i];
146 if (ppmu->get_constraint(event[i], &amasks[i][0],
147 &avalues[i][0]))
148 return -1;
149 choice[i] = 0;
150 }
151 value = mask = 0;
152 for (i = 0; i < n_ev; ++i) {
153 nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf);
154 if ((((nv + tadd) ^ value) & mask) != 0 ||
155 (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0)
156 break;
157 value = nv;
158 mask |= amasks[i][0];
159 }
160 if (i == n_ev)
161 return 0; /* all OK */
162
163 /* doesn't work, gather alternatives... */
164 if (!ppmu->get_alternatives)
165 return -1;
166 for (i = 0; i < n_ev; ++i) {
167 n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]);
168 for (j = 1; j < n_alt[i]; ++j)
169 ppmu->get_constraint(alternatives[i][j],
170 &amasks[i][j], &avalues[i][j]);
171 }
172
173 /* enumerate all possibilities and see if any will work */
174 i = 0;
175 j = -1;
176 value = mask = nv = 0;
177 while (i < n_ev) {
178 if (j >= 0) {
179 /* we're backtracking, restore context */
180 value = svalues[i];
181 mask = smasks[i];
182 j = choice[i];
183 }
184 /*
185 * See if any alternative k for event i,
186 * where k > j, will satisfy the constraints.
187 */
188 while (++j < n_alt[i]) {
189 nv = (value | avalues[i][j]) +
190 (value & avalues[i][j] & addf);
191 if ((((nv + tadd) ^ value) & mask) == 0 &&
192 (((nv + tadd) ^ avalues[i][j])
193 & amasks[i][j]) == 0)
194 break;
195 }
196 if (j >= n_alt[i]) {
197 /*
198 * No feasible alternative, backtrack
199 * to event i-1 and continue enumerating its
200 * alternatives from where we got up to.
201 */
202 if (--i < 0)
203 return -1;
204 } else {
205 /*
206 * Found a feasible alternative for event i,
207 * remember where we got up to with this event,
208 * go on to the next event, and start with
209 * the first alternative for it.
210 */
211 choice[i] = j;
212 svalues[i] = value;
213 smasks[i] = mask;
214 value = nv;
215 mask |= amasks[i][j];
216 ++i;
217 j = -1;
218 }
219 }
220
221 /* OK, we have a feasible combination, tell the caller the solution */
222 for (i = 0; i < n_ev; ++i)
223 event[i] = alternatives[i][choice[i]];
224 return 0;
225}
226
0475f9ea
PM
227/*
228 * Check if newly-added counters have consistent settings for
229 * exclude_{user,kernel,hv} with each other and any previously
230 * added counters.
231 */
232static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
233{
234 int eu, ek, eh;
235 int i, n;
236 struct perf_counter *counter;
237
238 n = n_prev + n_new;
239 if (n <= 1)
240 return 0;
241
242 eu = ctrs[0]->hw_event.exclude_user;
243 ek = ctrs[0]->hw_event.exclude_kernel;
244 eh = ctrs[0]->hw_event.exclude_hv;
245 if (n_prev == 0)
246 n_prev = 1;
247 for (i = n_prev; i < n; ++i) {
248 counter = ctrs[i];
249 if (counter->hw_event.exclude_user != eu ||
250 counter->hw_event.exclude_kernel != ek ||
251 counter->hw_event.exclude_hv != eh)
252 return -EAGAIN;
253 }
254 return 0;
255}
256
4574910e
PM
257static void power_perf_read(struct perf_counter *counter)
258{
259 long val, delta, prev;
260
261 if (!counter->hw.idx)
262 return;
263 /*
264 * Performance monitor interrupts come even when interrupts
265 * are soft-disabled, as long as interrupts are hard-enabled.
266 * Therefore we treat them like NMIs.
267 */
268 do {
269 prev = atomic64_read(&counter->hw.prev_count);
270 barrier();
271 val = read_pmc(counter->hw.idx);
272 } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
273
274 /* The counters are only 32 bits wide */
275 delta = (val - prev) & 0xfffffffful;
276 atomic64_add(delta, &counter->count);
277 atomic64_sub(delta, &counter->hw.period_left);
278}
279
280/*
281 * Disable all counters to prevent PMU interrupts and to allow
282 * counters to be added or removed.
283 */
284u64 hw_perf_save_disable(void)
285{
286 struct cpu_hw_counters *cpuhw;
287 unsigned long ret;
288 unsigned long flags;
289
290 local_irq_save(flags);
291 cpuhw = &__get_cpu_var(cpu_hw_counters);
292
293 ret = cpuhw->disabled;
294 if (!ret) {
295 cpuhw->disabled = 1;
296 cpuhw->n_added = 0;
297
01d0287f
PM
298 /*
299 * Check if we ever enabled the PMU on this cpu.
300 */
301 if (!cpuhw->pmcs_enabled) {
302 if (ppc_md.enable_pmcs)
303 ppc_md.enable_pmcs();
304 cpuhw->pmcs_enabled = 1;
305 }
306
4574910e
PM
307 /*
308 * Set the 'freeze counters' bit.
309 * The barrier is to make sure the mtspr has been
310 * executed and the PMU has frozen the counters
311 * before we return.
312 */
313 mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
314 mb();
315 }
316 local_irq_restore(flags);
317 return ret;
318}
319
320/*
321 * Re-enable all counters if disable == 0.
322 * If we were previously disabled and counters were added, then
323 * put the new config on the PMU.
324 */
325void hw_perf_restore(u64 disable)
326{
327 struct perf_counter *counter;
328 struct cpu_hw_counters *cpuhw;
329 unsigned long flags;
330 long i;
331 unsigned long val;
332 s64 left;
333 unsigned int hwc_index[MAX_HWCOUNTERS];
334
335 if (disable)
336 return;
337 local_irq_save(flags);
338 cpuhw = &__get_cpu_var(cpu_hw_counters);
339 cpuhw->disabled = 0;
340
341 /*
342 * If we didn't change anything, or only removed counters,
343 * no need to recalculate MMCR* settings and reset the PMCs.
344 * Just reenable the PMU with the current MMCR* settings
345 * (possibly updated for removal of counters).
346 */
347 if (!cpuhw->n_added) {
348 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
349 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
350 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
01d0287f
PM
351 if (cpuhw->n_counters == 0)
352 get_lppaca()->pmcregs_in_use = 0;
4574910e
PM
353 goto out;
354 }
355
356 /*
357 * Compute MMCR* values for the new set of counters
358 */
359 if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
360 cpuhw->mmcr)) {
361 /* shouldn't ever get here */
362 printk(KERN_ERR "oops compute_mmcr failed\n");
363 goto out;
364 }
365
0475f9ea
PM
366 /*
367 * Add in MMCR0 freeze bits corresponding to the
368 * hw_event.exclude_* bits for the first counter.
369 * We have already checked that all counters have the
370 * same values for these bits as the first counter.
371 */
372 counter = cpuhw->counter[0];
373 if (counter->hw_event.exclude_user)
374 cpuhw->mmcr[0] |= MMCR0_FCP;
375 if (counter->hw_event.exclude_kernel)
d095cd46 376 cpuhw->mmcr[0] |= freeze_counters_kernel;
0475f9ea
PM
377 if (counter->hw_event.exclude_hv)
378 cpuhw->mmcr[0] |= MMCR0_FCHV;
379
4574910e
PM
380 /*
381 * Write the new configuration to MMCR* with the freeze
382 * bit set and set the hardware counters to their initial values.
383 * Then unfreeze the counters.
384 */
01d0287f 385 get_lppaca()->pmcregs_in_use = 1;
4574910e
PM
386 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
387 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
388 mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
389 | MMCR0_FC);
390
391 /*
392 * Read off any pre-existing counters that need to move
393 * to another PMC.
394 */
395 for (i = 0; i < cpuhw->n_counters; ++i) {
396 counter = cpuhw->counter[i];
397 if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
398 power_perf_read(counter);
399 write_pmc(counter->hw.idx, 0);
400 counter->hw.idx = 0;
401 }
402 }
403
404 /*
405 * Initialize the PMCs for all the new and moved counters.
406 */
407 for (i = 0; i < cpuhw->n_counters; ++i) {
408 counter = cpuhw->counter[i];
409 if (counter->hw.idx)
410 continue;
411 val = 0;
412 if (counter->hw_event.irq_period) {
413 left = atomic64_read(&counter->hw.period_left);
414 if (left < 0x80000000L)
415 val = 0x80000000L - left;
416 }
417 atomic64_set(&counter->hw.prev_count, val);
418 counter->hw.idx = hwc_index[i] + 1;
419 write_pmc(counter->hw.idx, val);
420 }
421 mb();
422 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
423 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
424
425 out:
426 local_irq_restore(flags);
427}
428
429static int collect_events(struct perf_counter *group, int max_count,
430 struct perf_counter *ctrs[], unsigned int *events)
431{
432 int n = 0;
433 struct perf_counter *counter;
434
435 if (!is_software_counter(group)) {
436 if (n >= max_count)
437 return -1;
438 ctrs[n] = group;
439 events[n++] = group->hw.config;
440 }
441 list_for_each_entry(counter, &group->sibling_list, list_entry) {
442 if (!is_software_counter(counter) &&
443 counter->state != PERF_COUNTER_STATE_OFF) {
444 if (n >= max_count)
445 return -1;
446 ctrs[n] = counter;
447 events[n++] = counter->hw.config;
448 }
449 }
450 return n;
451}
452
453static void counter_sched_in(struct perf_counter *counter, int cpu)
454{
455 counter->state = PERF_COUNTER_STATE_ACTIVE;
456 counter->oncpu = cpu;
457 if (is_software_counter(counter))
458 counter->hw_ops->enable(counter);
459}
460
461/*
462 * Called to enable a whole group of counters.
463 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
464 * Assumes the caller has disabled interrupts and has
465 * frozen the PMU with hw_perf_save_disable.
466 */
467int hw_perf_group_sched_in(struct perf_counter *group_leader,
468 struct perf_cpu_context *cpuctx,
469 struct perf_counter_context *ctx, int cpu)
470{
471 struct cpu_hw_counters *cpuhw;
472 long i, n, n0;
473 struct perf_counter *sub;
474
475 cpuhw = &__get_cpu_var(cpu_hw_counters);
476 n0 = cpuhw->n_counters;
477 n = collect_events(group_leader, ppmu->n_counter - n0,
478 &cpuhw->counter[n0], &cpuhw->events[n0]);
479 if (n < 0)
480 return -EAGAIN;
0475f9ea
PM
481 if (check_excludes(cpuhw->counter, n0, n))
482 return -EAGAIN;
4574910e
PM
483 if (power_check_constraints(cpuhw->events, n + n0))
484 return -EAGAIN;
485 cpuhw->n_counters = n0 + n;
486 cpuhw->n_added += n;
487
488 /*
489 * OK, this group can go on; update counter states etc.,
490 * and enable any software counters
491 */
492 for (i = n0; i < n0 + n; ++i)
493 cpuhw->counter[i]->hw.config = cpuhw->events[i];
3b6f9e5c 494 cpuctx->active_oncpu += n;
4574910e
PM
495 n = 1;
496 counter_sched_in(group_leader, cpu);
497 list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
498 if (sub->state != PERF_COUNTER_STATE_OFF) {
499 counter_sched_in(sub, cpu);
500 ++n;
501 }
502 }
4574910e
PM
503 ctx->nr_active += n;
504
505 return 1;
506}
507
508/*
509 * Add a counter to the PMU.
510 * If all counters are not already frozen, then we disable and
511 * re-enable the PMU in order to get hw_perf_restore to do the
512 * actual work of reconfiguring the PMU.
513 */
514static int power_perf_enable(struct perf_counter *counter)
515{
516 struct cpu_hw_counters *cpuhw;
517 unsigned long flags;
518 u64 pmudis;
519 int n0;
520 int ret = -EAGAIN;
521
522 local_irq_save(flags);
523 pmudis = hw_perf_save_disable();
524
525 /*
526 * Add the counter to the list (if there is room)
527 * and check whether the total set is still feasible.
528 */
529 cpuhw = &__get_cpu_var(cpu_hw_counters);
530 n0 = cpuhw->n_counters;
531 if (n0 >= ppmu->n_counter)
532 goto out;
533 cpuhw->counter[n0] = counter;
534 cpuhw->events[n0] = counter->hw.config;
0475f9ea
PM
535 if (check_excludes(cpuhw->counter, n0, 1))
536 goto out;
4574910e
PM
537 if (power_check_constraints(cpuhw->events, n0 + 1))
538 goto out;
539
540 counter->hw.config = cpuhw->events[n0];
541 ++cpuhw->n_counters;
542 ++cpuhw->n_added;
543
544 ret = 0;
545 out:
546 hw_perf_restore(pmudis);
547 local_irq_restore(flags);
548 return ret;
549}
550
551/*
552 * Remove a counter from the PMU.
553 */
554static void power_perf_disable(struct perf_counter *counter)
555{
556 struct cpu_hw_counters *cpuhw;
557 long i;
558 u64 pmudis;
559 unsigned long flags;
560
561 local_irq_save(flags);
562 pmudis = hw_perf_save_disable();
563
564 power_perf_read(counter);
565
566 cpuhw = &__get_cpu_var(cpu_hw_counters);
567 for (i = 0; i < cpuhw->n_counters; ++i) {
568 if (counter == cpuhw->counter[i]) {
569 while (++i < cpuhw->n_counters)
570 cpuhw->counter[i-1] = cpuhw->counter[i];
571 --cpuhw->n_counters;
572 ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
573 write_pmc(counter->hw.idx, 0);
574 counter->hw.idx = 0;
575 break;
576 }
577 }
578 if (cpuhw->n_counters == 0) {
579 /* disable exceptions if no counters are running */
580 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
581 }
582
583 hw_perf_restore(pmudis);
584 local_irq_restore(flags);
585}
586
587struct hw_perf_counter_ops power_perf_ops = {
588 .enable = power_perf_enable,
589 .disable = power_perf_disable,
590 .read = power_perf_read
591};
592
593const struct hw_perf_counter_ops *
594hw_perf_counter_init(struct perf_counter *counter)
595{
596 unsigned long ev;
597 struct perf_counter *ctrs[MAX_HWCOUNTERS];
598 unsigned int events[MAX_HWCOUNTERS];
599 int n;
600
601 if (!ppmu)
602 return NULL;
603 if ((s64)counter->hw_event.irq_period < 0)
604 return NULL;
605 ev = counter->hw_event.type;
606 if (!counter->hw_event.raw) {
607 if (ev >= ppmu->n_generic ||
608 ppmu->generic_events[ev] == 0)
609 return NULL;
610 ev = ppmu->generic_events[ev];
611 }
612 counter->hw.config_base = ev;
613 counter->hw.idx = 0;
614
0475f9ea
PM
615 /*
616 * If we are not running on a hypervisor, force the
617 * exclude_hv bit to 0 so that we don't care what
d095cd46 618 * the user set it to.
0475f9ea
PM
619 */
620 if (!firmware_has_feature(FW_FEATURE_LPAR))
621 counter->hw_event.exclude_hv = 0;
622
4574910e
PM
623 /*
624 * If this is in a group, check if it can go on with all the
625 * other hardware counters in the group. We assume the counter
626 * hasn't been linked into its leader's sibling list at this point.
627 */
628 n = 0;
629 if (counter->group_leader != counter) {
630 n = collect_events(counter->group_leader, ppmu->n_counter - 1,
631 ctrs, events);
632 if (n < 0)
633 return NULL;
634 }
0475f9ea 635 events[n] = ev;
86028598 636 ctrs[n] = counter;
0475f9ea
PM
637 if (check_excludes(ctrs, n, 1))
638 return NULL;
639 if (power_check_constraints(events, n + 1))
4574910e
PM
640 return NULL;
641
0475f9ea 642 counter->hw.config = events[n];
4574910e
PM
643 atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
644 return &power_perf_ops;
645}
646
647/*
648 * Handle wakeups.
649 */
650void perf_counter_do_pending(void)
651{
652 int i;
653 struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
654 struct perf_counter *counter;
655
656 set_perf_counter_pending(0);
657 for (i = 0; i < cpuhw->n_counters; ++i) {
658 counter = cpuhw->counter[i];
659 if (counter && counter->wakeup_pending) {
660 counter->wakeup_pending = 0;
661 wake_up(&counter->waitq);
662 }
663 }
664}
665
666/*
667 * Record data for an irq counter.
668 * This function was lifted from the x86 code; maybe it should
669 * go in the core?
670 */
671static void perf_store_irq_data(struct perf_counter *counter, u64 data)
672{
673 struct perf_data *irqdata = counter->irqdata;
674
675 if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
676 irqdata->overrun++;
677 } else {
678 u64 *p = (u64 *) &irqdata->data[irqdata->len];
679
680 *p = data;
681 irqdata->len += sizeof(u64);
682 }
683}
684
685/*
686 * Record all the values of the counters in a group
687 */
688static void perf_handle_group(struct perf_counter *counter)
689{
690 struct perf_counter *leader, *sub;
691
692 leader = counter->group_leader;
693 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
694 if (sub != counter)
695 sub->hw_ops->read(sub);
696 perf_store_irq_data(counter, sub->hw_event.type);
697 perf_store_irq_data(counter, atomic64_read(&sub->count));
698 }
699}
700
701/*
702 * A counter has overflowed; update its count and record
703 * things if requested. Note that interrupts are hard-disabled
704 * here so there is no possibility of being interrupted.
705 */
706static void record_and_restart(struct perf_counter *counter, long val,
707 struct pt_regs *regs)
708{
709 s64 prev, delta, left;
710 int record = 0;
711
712 /* we don't have to worry about interrupts here */
713 prev = atomic64_read(&counter->hw.prev_count);
714 delta = (val - prev) & 0xfffffffful;
715 atomic64_add(delta, &counter->count);
716
717 /*
718 * See if the total period for this counter has expired,
719 * and update for the next period.
720 */
721 val = 0;
722 left = atomic64_read(&counter->hw.period_left) - delta;
723 if (counter->hw_event.irq_period) {
724 if (left <= 0) {
725 left += counter->hw_event.irq_period;
726 if (left <= 0)
727 left = counter->hw_event.irq_period;
728 record = 1;
729 }
730 if (left < 0x80000000L)
731 val = 0x80000000L - left;
732 }
733 write_pmc(counter->hw.idx, val);
734 atomic64_set(&counter->hw.prev_count, val);
735 atomic64_set(&counter->hw.period_left, left);
736
737 /*
738 * Finally record data if requested.
739 */
740 if (record) {
741 switch (counter->hw_event.record_type) {
742 case PERF_RECORD_SIMPLE:
743 break;
744 case PERF_RECORD_IRQ:
745 perf_store_irq_data(counter, instruction_pointer(regs));
746 counter->wakeup_pending = 1;
747 break;
748 case PERF_RECORD_GROUP:
749 perf_handle_group(counter);
750 counter->wakeup_pending = 1;
751 break;
752 }
753 }
754}
755
756/*
757 * Performance monitor interrupt stuff
758 */
759static void perf_counter_interrupt(struct pt_regs *regs)
760{
761 int i;
762 struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
763 struct perf_counter *counter;
764 long val;
765 int need_wakeup = 0, found = 0;
766
767 for (i = 0; i < cpuhw->n_counters; ++i) {
768 counter = cpuhw->counter[i];
769 val = read_pmc(counter->hw.idx);
770 if ((int)val < 0) {
771 /* counter has overflowed */
772 found = 1;
773 record_and_restart(counter, val, regs);
774 if (counter->wakeup_pending)
775 need_wakeup = 1;
776 }
777 }
778
779 /*
780 * In case we didn't find and reset the counter that caused
781 * the interrupt, scan all counters and reset any that are
782 * negative, to avoid getting continual interrupts.
783 * Any that we processed in the previous loop will not be negative.
784 */
785 if (!found) {
786 for (i = 0; i < ppmu->n_counter; ++i) {
787 val = read_pmc(i + 1);
788 if ((int)val < 0)
789 write_pmc(i + 1, 0);
790 }
791 }
792
793 /*
794 * Reset MMCR0 to its normal value. This will set PMXE and
795 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
796 * and thus allow interrupts to occur again.
797 * XXX might want to use MSR.PM to keep the counters frozen until
798 * we get back out of this interrupt.
799 */
800 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
801
802 /*
803 * If we need a wakeup, check whether interrupts were soft-enabled
804 * when we took the interrupt. If they were, we can wake stuff up
805 * immediately; otherwise we'll have to set a flag and do the
806 * wakeup when interrupts get soft-enabled.
807 */
808 if (need_wakeup) {
809 if (regs->softe) {
810 irq_enter();
811 perf_counter_do_pending();
812 irq_exit();
813 } else {
814 set_perf_counter_pending(1);
815 }
816 }
817}
818
01d0287f
PM
819void hw_perf_counter_setup(int cpu)
820{
821 struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
822
823 memset(cpuhw, 0, sizeof(*cpuhw));
824 cpuhw->mmcr[0] = MMCR0_FC;
825}
826
16b06799 827extern struct power_pmu ppc970_pmu;
742bd95b 828extern struct power_pmu power5_pmu;
aabbaa60 829extern struct power_pmu power5p_pmu;
f7862837 830extern struct power_pmu power6_pmu;
16b06799 831
4574910e
PM
832static int init_perf_counters(void)
833{
16b06799
PM
834 unsigned long pvr;
835
4574910e
PM
836 if (reserve_pmc_hardware(perf_counter_interrupt)) {
837 printk(KERN_ERR "Couldn't init performance monitor subsystem\n");
838 return -EBUSY;
839 }
840
16b06799
PM
841 /* XXX should get this from cputable */
842 pvr = mfspr(SPRN_PVR);
843 switch (PVR_VER(pvr)) {
844 case PV_970:
845 case PV_970FX:
846 case PV_970MP:
847 ppmu = &ppc970_pmu;
848 break;
742bd95b
PM
849 case PV_POWER5:
850 ppmu = &power5_pmu;
851 break;
aabbaa60
PM
852 case PV_POWER5p:
853 ppmu = &power5p_pmu;
854 break;
f7862837
PM
855 case 0x3e:
856 ppmu = &power6_pmu;
857 break;
16b06799 858 }
d095cd46
PM
859
860 /*
861 * Use FCHV to ignore kernel events if MSR.HV is set.
862 */
863 if (mfmsr() & MSR_HV)
864 freeze_counters_kernel = MMCR0_FCHV;
865
4574910e
PM
866 return 0;
867}
868
869arch_initcall(init_perf_counters);