sparc64: Fix comment typo in perf_event.c
[linux-2.6-block.git] / arch / sparc / kernel / perf_event.c
1 /* Performance event support for sparc64.
2  *
3  * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
4  *
5  * This code is based almost entirely upon the x86 perf event
6  * code, which is:
7  *
8  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
9  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
10  *  Copyright (C) 2009 Jaswinder Singh Rajput
11  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
12  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
13  */
14
15 #include <linux/perf_event.h>
16 #include <linux/kprobes.h>
17 #include <linux/kernel.h>
18 #include <linux/kdebug.h>
19 #include <linux/mutex.h>
20
21 #include <asm/cpudata.h>
22 #include <asm/atomic.h>
23 #include <asm/nmi.h>
24 #include <asm/pcr.h>
25
26 /* Sparc64 chips have two performance counters, 32-bits each, with
27  * overflow interrupts generated on transition from 0xffffffff to 0.
28  * The counters are accessed in one go using a 64-bit register.
29  *
30  * Both counters are controlled using a single control register.  The
31  * only way to stop all sampling is to clear all of the context (user,
32  * supervisor, hypervisor) sampling enable bits.  But these bits apply
33  * to both counters, thus the two counters can't be enabled/disabled
34  * individually.
35  *
36  * The control register has two event fields, one for each of the two
37  * counters.  It's thus nearly impossible to have one counter going
38  * while keeping the other one stopped.  Therefore it is possible to
39  * get overflow interrupts for counters not currently "in use" and
40  * that condition must be checked in the overflow interrupt handler.
41  *
42  * So we use a hack, in that we program inactive counters with the
43  * "sw_count0" and "sw_count1" events.  These count how many times
44  * the instruction "sethi %hi(0xfc000), %g0" is executed.  It's an
45  * unusual way to encode a NOP and therefore will not trigger in
46  * normal code.
47  */
48
49 #define MAX_HWEVENTS                    2
50 #define MAX_PERIOD                      ((1UL << 32) - 1)
51
52 #define PIC_UPPER_INDEX                 0
53 #define PIC_LOWER_INDEX                 1
54
55 struct cpu_hw_events {
56         struct perf_event       *events[MAX_HWEVENTS];
57         unsigned long           used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
58         unsigned long           active_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
59         int enabled;
60 };
61 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
62
63 struct perf_event_map {
64         u16     encoding;
65         u8      pic_mask;
66 #define PIC_NONE        0x00
67 #define PIC_UPPER       0x01
68 #define PIC_LOWER       0x02
69 };
70
71 static unsigned long perf_event_encode(const struct perf_event_map *pmap)
72 {
73         return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask;
74 }
75
76 static void perf_event_decode(unsigned long val, u16 *enc, u8 *msk)
77 {
78         *msk = val & 0xff;
79         *enc = val >> 16;
80 }
81
82 #define C(x) PERF_COUNT_HW_CACHE_##x
83
84 #define CACHE_OP_UNSUPPORTED    0xfffe
85 #define CACHE_OP_NONSENSE       0xffff
86
87 typedef struct perf_event_map cache_map_t
88                                 [PERF_COUNT_HW_CACHE_MAX]
89                                 [PERF_COUNT_HW_CACHE_OP_MAX]
90                                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
91
92 struct sparc_pmu {
93         const struct perf_event_map     *(*event_map)(int);
94         const cache_map_t               *cache_map;
95         int                             max_events;
96         int                             upper_shift;
97         int                             lower_shift;
98         int                             event_mask;
99         int                             hv_bit;
100         int                             irq_bit;
101         int                             upper_nop;
102         int                             lower_nop;
103 };
104
105 static const struct perf_event_map ultra3_perfmon_event_map[] = {
106         [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
107         [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
108         [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
109         [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
110 };
111
112 static const struct perf_event_map *ultra3_event_map(int event_id)
113 {
114         return &ultra3_perfmon_event_map[event_id];
115 }
116
117 static const cache_map_t ultra3_cache_map = {
118 [C(L1D)] = {
119         [C(OP_READ)] = {
120                 [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, },
121                 [C(RESULT_MISS)] = { 0x09, PIC_UPPER, },
122         },
123         [C(OP_WRITE)] = {
124                 [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER },
125                 [C(RESULT_MISS)] = { 0x0a, PIC_UPPER },
126         },
127         [C(OP_PREFETCH)] = {
128                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
129                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
130         },
131 },
132 [C(L1I)] = {
133         [C(OP_READ)] = {
134                 [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, },
135                 [C(RESULT_MISS)] = { 0x09, PIC_UPPER, },
136         },
137         [ C(OP_WRITE) ] = {
138                 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
139                 [ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
140         },
141         [ C(OP_PREFETCH) ] = {
142                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
143                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
144         },
145 },
146 [C(LL)] = {
147         [C(OP_READ)] = {
148                 [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, },
149                 [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, },
150         },
151         [C(OP_WRITE)] = {
152                 [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER },
153                 [C(RESULT_MISS)] = { 0x0c, PIC_UPPER },
154         },
155         [C(OP_PREFETCH)] = {
156                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
157                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
158         },
159 },
160 [C(DTLB)] = {
161         [C(OP_READ)] = {
162                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
163                 [C(RESULT_MISS)] = { 0x12, PIC_UPPER, },
164         },
165         [ C(OP_WRITE) ] = {
166                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
167                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
168         },
169         [ C(OP_PREFETCH) ] = {
170                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
171                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
172         },
173 },
174 [C(ITLB)] = {
175         [C(OP_READ)] = {
176                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
177                 [C(RESULT_MISS)] = { 0x11, PIC_UPPER, },
178         },
179         [ C(OP_WRITE) ] = {
180                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
181                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
182         },
183         [ C(OP_PREFETCH) ] = {
184                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
185                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
186         },
187 },
188 [C(BPU)] = {
189         [C(OP_READ)] = {
190                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
191                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
192         },
193         [ C(OP_WRITE) ] = {
194                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
195                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
196         },
197         [ C(OP_PREFETCH) ] = {
198                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
199                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
200         },
201 },
202 };
203
204 static const struct sparc_pmu ultra3_pmu = {
205         .event_map      = ultra3_event_map,
206         .cache_map      = &ultra3_cache_map,
207         .max_events     = ARRAY_SIZE(ultra3_perfmon_event_map),
208         .upper_shift    = 11,
209         .lower_shift    = 4,
210         .event_mask     = 0x3f,
211         .upper_nop      = 0x1c,
212         .lower_nop      = 0x14,
213 };
214
215 /* Niagara1 is very limited.  The upper PIC is hard-locked to count
216  * only instructions, so it is free running which creates all kinds of
217  * problems.  Some hardware designs make one wonder if the creator
218  * even looked at how this stuff gets used by software.
219  */
220 static const struct perf_event_map niagara1_perfmon_event_map[] = {
221         [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER },
222         [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER },
223         [PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE },
224         [PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER },
225 };
226
227 static const struct perf_event_map *niagara1_event_map(int event_id)
228 {
229         return &niagara1_perfmon_event_map[event_id];
230 }
231
232 static const cache_map_t niagara1_cache_map = {
233 [C(L1D)] = {
234         [C(OP_READ)] = {
235                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
236                 [C(RESULT_MISS)] = { 0x03, PIC_LOWER, },
237         },
238         [C(OP_WRITE)] = {
239                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
240                 [C(RESULT_MISS)] = { 0x03, PIC_LOWER, },
241         },
242         [C(OP_PREFETCH)] = {
243                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
244                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
245         },
246 },
247 [C(L1I)] = {
248         [C(OP_READ)] = {
249                 [C(RESULT_ACCESS)] = { 0x00, PIC_UPPER },
250                 [C(RESULT_MISS)] = { 0x02, PIC_LOWER, },
251         },
252         [ C(OP_WRITE) ] = {
253                 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
254                 [ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
255         },
256         [ C(OP_PREFETCH) ] = {
257                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
258                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
259         },
260 },
261 [C(LL)] = {
262         [C(OP_READ)] = {
263                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
264                 [C(RESULT_MISS)] = { 0x07, PIC_LOWER, },
265         },
266         [C(OP_WRITE)] = {
267                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
268                 [C(RESULT_MISS)] = { 0x07, PIC_LOWER, },
269         },
270         [C(OP_PREFETCH)] = {
271                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
272                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
273         },
274 },
275 [C(DTLB)] = {
276         [C(OP_READ)] = {
277                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
278                 [C(RESULT_MISS)] = { 0x05, PIC_LOWER, },
279         },
280         [ C(OP_WRITE) ] = {
281                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
282                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
283         },
284         [ C(OP_PREFETCH) ] = {
285                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
286                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
287         },
288 },
289 [C(ITLB)] = {
290         [C(OP_READ)] = {
291                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
292                 [C(RESULT_MISS)] = { 0x04, PIC_LOWER, },
293         },
294         [ C(OP_WRITE) ] = {
295                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
296                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
297         },
298         [ C(OP_PREFETCH) ] = {
299                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
300                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
301         },
302 },
303 [C(BPU)] = {
304         [C(OP_READ)] = {
305                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
306                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
307         },
308         [ C(OP_WRITE) ] = {
309                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
310                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
311         },
312         [ C(OP_PREFETCH) ] = {
313                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
314                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
315         },
316 },
317 };
318
319 static const struct sparc_pmu niagara1_pmu = {
320         .event_map      = niagara1_event_map,
321         .cache_map      = &niagara1_cache_map,
322         .max_events     = ARRAY_SIZE(niagara1_perfmon_event_map),
323         .upper_shift    = 0,
324         .lower_shift    = 4,
325         .event_mask     = 0x7,
326         .upper_nop      = 0x0,
327         .lower_nop      = 0x0,
328 };
329
330 static const struct perf_event_map niagara2_perfmon_event_map[] = {
331         [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER },
332         [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER },
333         [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER },
334         [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER },
335         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER },
336         [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER },
337 };
338
339 static const struct perf_event_map *niagara2_event_map(int event_id)
340 {
341         return &niagara2_perfmon_event_map[event_id];
342 }
343
344 static const cache_map_t niagara2_cache_map = {
345 [C(L1D)] = {
346         [C(OP_READ)] = {
347                 [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, },
348                 [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, },
349         },
350         [C(OP_WRITE)] = {
351                 [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, },
352                 [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, },
353         },
354         [C(OP_PREFETCH)] = {
355                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
356                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
357         },
358 },
359 [C(L1I)] = {
360         [C(OP_READ)] = {
361                 [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, },
362                 [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, },
363         },
364         [ C(OP_WRITE) ] = {
365                 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
366                 [ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
367         },
368         [ C(OP_PREFETCH) ] = {
369                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
370                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
371         },
372 },
373 [C(LL)] = {
374         [C(OP_READ)] = {
375                 [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, },
376                 [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, },
377         },
378         [C(OP_WRITE)] = {
379                 [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, },
380                 [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, },
381         },
382         [C(OP_PREFETCH)] = {
383                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
384                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
385         },
386 },
387 [C(DTLB)] = {
388         [C(OP_READ)] = {
389                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
390                 [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, },
391         },
392         [ C(OP_WRITE) ] = {
393                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
394                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
395         },
396         [ C(OP_PREFETCH) ] = {
397                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
398                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
399         },
400 },
401 [C(ITLB)] = {
402         [C(OP_READ)] = {
403                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
404                 [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, },
405         },
406         [ C(OP_WRITE) ] = {
407                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
408                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
409         },
410         [ C(OP_PREFETCH) ] = {
411                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
412                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
413         },
414 },
415 [C(BPU)] = {
416         [C(OP_READ)] = {
417                 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
418                 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
419         },
420         [ C(OP_WRITE) ] = {
421                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
422                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
423         },
424         [ C(OP_PREFETCH) ] = {
425                 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
426                 [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
427         },
428 },
429 };
430
431 static const struct sparc_pmu niagara2_pmu = {
432         .event_map      = niagara2_event_map,
433         .cache_map      = &niagara2_cache_map,
434         .max_events     = ARRAY_SIZE(niagara2_perfmon_event_map),
435         .upper_shift    = 19,
436         .lower_shift    = 6,
437         .event_mask     = 0xfff,
438         .hv_bit         = 0x8,
439         .irq_bit        = 0x03,
440         .upper_nop      = 0x220,
441         .lower_nop      = 0x220,
442 };
443
444 static const struct sparc_pmu *sparc_pmu __read_mostly;
445
446 static u64 event_encoding(u64 event_id, int idx)
447 {
448         if (idx == PIC_UPPER_INDEX)
449                 event_id <<= sparc_pmu->upper_shift;
450         else
451                 event_id <<= sparc_pmu->lower_shift;
452         return event_id;
453 }
454
455 static u64 mask_for_index(int idx)
456 {
457         return event_encoding(sparc_pmu->event_mask, idx);
458 }
459
460 static u64 nop_for_index(int idx)
461 {
462         return event_encoding(idx == PIC_UPPER_INDEX ?
463                               sparc_pmu->upper_nop :
464                               sparc_pmu->lower_nop, idx);
465 }
466
467 static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, int idx)
468 {
469         u64 val, mask = mask_for_index(idx);
470
471         val = pcr_ops->read();
472         pcr_ops->write((val & ~mask) | hwc->config);
473 }
474
475 static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, int idx)
476 {
477         u64 mask = mask_for_index(idx);
478         u64 nop = nop_for_index(idx);
479         u64 val = pcr_ops->read();
480
481         pcr_ops->write((val & ~mask) | nop);
482 }
483
484 void hw_perf_enable(void)
485 {
486         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
487         u64 val;
488         int i;
489
490         if (cpuc->enabled)
491                 return;
492
493         cpuc->enabled = 1;
494         barrier();
495
496         val = pcr_ops->read();
497
498         for (i = 0; i < MAX_HWEVENTS; i++) {
499                 struct perf_event *cp = cpuc->events[i];
500                 struct hw_perf_event *hwc;
501
502                 if (!cp)
503                         continue;
504                 hwc = &cp->hw;
505                 val |= hwc->config_base;
506         }
507
508         pcr_ops->write(val);
509 }
510
511 void hw_perf_disable(void)
512 {
513         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
514         u64 val;
515
516         if (!cpuc->enabled)
517                 return;
518
519         cpuc->enabled = 0;
520
521         val = pcr_ops->read();
522         val &= ~(PCR_UTRACE | PCR_STRACE |
523                  sparc_pmu->hv_bit | sparc_pmu->irq_bit);
524         pcr_ops->write(val);
525 }
526
527 static u32 read_pmc(int idx)
528 {
529         u64 val;
530
531         read_pic(val);
532         if (idx == PIC_UPPER_INDEX)
533                 val >>= 32;
534
535         return val & 0xffffffff;
536 }
537
538 static void write_pmc(int idx, u64 val)
539 {
540         u64 shift, mask, pic;
541
542         shift = 0;
543         if (idx == PIC_UPPER_INDEX)
544                 shift = 32;
545
546         mask = ((u64) 0xffffffff) << shift;
547         val <<= shift;
548
549         read_pic(pic);
550         pic &= ~mask;
551         pic |= val;
552         write_pic(pic);
553 }
554
555 static int sparc_perf_event_set_period(struct perf_event *event,
556                                        struct hw_perf_event *hwc, int idx)
557 {
558         s64 left = atomic64_read(&hwc->period_left);
559         s64 period = hwc->sample_period;
560         int ret = 0;
561
562         if (unlikely(left <= -period)) {
563                 left = period;
564                 atomic64_set(&hwc->period_left, left);
565                 hwc->last_period = period;
566                 ret = 1;
567         }
568
569         if (unlikely(left <= 0)) {
570                 left += period;
571                 atomic64_set(&hwc->period_left, left);
572                 hwc->last_period = period;
573                 ret = 1;
574         }
575         if (left > MAX_PERIOD)
576                 left = MAX_PERIOD;
577
578         atomic64_set(&hwc->prev_count, (u64)-left);
579
580         write_pmc(idx, (u64)(-left) & 0xffffffff);
581
582         perf_event_update_userpage(event);
583
584         return ret;
585 }
586
587 static int sparc_pmu_enable(struct perf_event *event)
588 {
589         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
590         struct hw_perf_event *hwc = &event->hw;
591         int idx = hwc->idx;
592
593         if (test_and_set_bit(idx, cpuc->used_mask))
594                 return -EAGAIN;
595
596         sparc_pmu_disable_event(hwc, idx);
597
598         cpuc->events[idx] = event;
599         set_bit(idx, cpuc->active_mask);
600
601         sparc_perf_event_set_period(event, hwc, idx);
602         sparc_pmu_enable_event(hwc, idx);
603         perf_event_update_userpage(event);
604         return 0;
605 }
606
607 static u64 sparc_perf_event_update(struct perf_event *event,
608                                    struct hw_perf_event *hwc, int idx)
609 {
610         int shift = 64 - 32;
611         u64 prev_raw_count, new_raw_count;
612         s64 delta;
613
614 again:
615         prev_raw_count = atomic64_read(&hwc->prev_count);
616         new_raw_count = read_pmc(idx);
617
618         if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
619                              new_raw_count) != prev_raw_count)
620                 goto again;
621
622         delta = (new_raw_count << shift) - (prev_raw_count << shift);
623         delta >>= shift;
624
625         atomic64_add(delta, &event->count);
626         atomic64_sub(delta, &hwc->period_left);
627
628         return new_raw_count;
629 }
630
631 static void sparc_pmu_disable(struct perf_event *event)
632 {
633         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
634         struct hw_perf_event *hwc = &event->hw;
635         int idx = hwc->idx;
636
637         clear_bit(idx, cpuc->active_mask);
638         sparc_pmu_disable_event(hwc, idx);
639
640         barrier();
641
642         sparc_perf_event_update(event, hwc, idx);
643         cpuc->events[idx] = NULL;
644         clear_bit(idx, cpuc->used_mask);
645
646         perf_event_update_userpage(event);
647 }
648
649 static void sparc_pmu_read(struct perf_event *event)
650 {
651         struct hw_perf_event *hwc = &event->hw;
652         sparc_perf_event_update(event, hwc, hwc->idx);
653 }
654
655 static void sparc_pmu_unthrottle(struct perf_event *event)
656 {
657         struct hw_perf_event *hwc = &event->hw;
658         sparc_pmu_enable_event(hwc, hwc->idx);
659 }
660
661 static atomic_t active_events = ATOMIC_INIT(0);
662 static DEFINE_MUTEX(pmc_grab_mutex);
663
664 void perf_event_grab_pmc(void)
665 {
666         if (atomic_inc_not_zero(&active_events))
667                 return;
668
669         mutex_lock(&pmc_grab_mutex);
670         if (atomic_read(&active_events) == 0) {
671                 if (atomic_read(&nmi_active) > 0) {
672                         on_each_cpu(stop_nmi_watchdog, NULL, 1);
673                         BUG_ON(atomic_read(&nmi_active) != 0);
674                 }
675                 atomic_inc(&active_events);
676         }
677         mutex_unlock(&pmc_grab_mutex);
678 }
679
680 void perf_event_release_pmc(void)
681 {
682         if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) {
683                 if (atomic_read(&nmi_active) == 0)
684                         on_each_cpu(start_nmi_watchdog, NULL, 1);
685                 mutex_unlock(&pmc_grab_mutex);
686         }
687 }
688
689 static const struct perf_event_map *sparc_map_cache_event(u64 config)
690 {
691         unsigned int cache_type, cache_op, cache_result;
692         const struct perf_event_map *pmap;
693
694         if (!sparc_pmu->cache_map)
695                 return ERR_PTR(-ENOENT);
696
697         cache_type = (config >>  0) & 0xff;
698         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
699                 return ERR_PTR(-EINVAL);
700
701         cache_op = (config >>  8) & 0xff;
702         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
703                 return ERR_PTR(-EINVAL);
704
705         cache_result = (config >> 16) & 0xff;
706         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
707                 return ERR_PTR(-EINVAL);
708
709         pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]);
710
711         if (pmap->encoding == CACHE_OP_UNSUPPORTED)
712                 return ERR_PTR(-ENOENT);
713
714         if (pmap->encoding == CACHE_OP_NONSENSE)
715                 return ERR_PTR(-EINVAL);
716
717         return pmap;
718 }
719
720 static void hw_perf_event_destroy(struct perf_event *event)
721 {
722         perf_event_release_pmc();
723 }
724
725 /* Make sure all events can be scheduled into the hardware at
726  * the same time.  This is simplified by the fact that we only
727  * need to support 2 simultaneous HW events.
728  */
729 static int sparc_check_constraints(unsigned long *events, int n_ev)
730 {
731         if (n_ev <= perf_max_events) {
732                 u8 msk1, msk2;
733                 u16 dummy;
734
735                 if (n_ev == 1)
736                         return 0;
737                 BUG_ON(n_ev != 2);
738                 perf_event_decode(events[0], &dummy, &msk1);
739                 perf_event_decode(events[1], &dummy, &msk2);
740
741                 /* If both events can go on any counter, OK.  */
742                 if (msk1 == (PIC_UPPER | PIC_LOWER) &&
743                     msk2 == (PIC_UPPER | PIC_LOWER))
744                         return 0;
745
746                 /* If one event is limited to a specific counter,
747                  * and the other can go on both, OK.
748                  */
749                 if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) &&
750                     msk2 == (PIC_UPPER | PIC_LOWER))
751                         return 0;
752                 if ((msk2 == PIC_UPPER || msk2 == PIC_LOWER) &&
753                     msk1 == (PIC_UPPER | PIC_LOWER))
754                         return 0;
755
756                 /* If the events are fixed to different counters, OK.  */
757                 if ((msk1 == PIC_UPPER && msk2 == PIC_LOWER) ||
758                     (msk1 == PIC_LOWER && msk2 == PIC_UPPER))
759                         return 0;
760
761                 /* Otherwise, there is a conflict.  */
762         }
763
764         return -1;
765 }
766
767 static int check_excludes(struct perf_event **evts, int n_prev, int n_new)
768 {
769         int eu = 0, ek = 0, eh = 0;
770         struct perf_event *event;
771         int i, n, first;
772
773         n = n_prev + n_new;
774         if (n <= 1)
775                 return 0;
776
777         first = 1;
778         for (i = 0; i < n; i++) {
779                 event = evts[i];
780                 if (first) {
781                         eu = event->attr.exclude_user;
782                         ek = event->attr.exclude_kernel;
783                         eh = event->attr.exclude_hv;
784                         first = 0;
785                 } else if (event->attr.exclude_user != eu ||
786                            event->attr.exclude_kernel != ek ||
787                            event->attr.exclude_hv != eh) {
788                         return -EAGAIN;
789                 }
790         }
791
792         return 0;
793 }
794
795 static int collect_events(struct perf_event *group, int max_count,
796                           struct perf_event *evts[], unsigned long *events)
797 {
798         struct perf_event *event;
799         int n = 0;
800
801         if (!is_software_event(group)) {
802                 if (n >= max_count)
803                         return -1;
804                 evts[n] = group;
805                 events[n++] = group->hw.event_base;
806         }
807         list_for_each_entry(event, &group->sibling_list, group_entry) {
808                 if (!is_software_event(event) &&
809                     event->state != PERF_EVENT_STATE_OFF) {
810                         if (n >= max_count)
811                                 return -1;
812                         evts[n] = event;
813                         events[n++] = event->hw.event_base;
814                 }
815         }
816         return n;
817 }
818
819 static int __hw_perf_event_init(struct perf_event *event)
820 {
821         struct perf_event_attr *attr = &event->attr;
822         struct perf_event *evts[MAX_HWEVENTS];
823         struct hw_perf_event *hwc = &event->hw;
824         unsigned long events[MAX_HWEVENTS];
825         const struct perf_event_map *pmap;
826         u64 enc;
827         int n;
828
829         if (atomic_read(&nmi_active) < 0)
830                 return -ENODEV;
831
832         if (attr->type == PERF_TYPE_HARDWARE) {
833                 if (attr->config >= sparc_pmu->max_events)
834                         return -EINVAL;
835                 pmap = sparc_pmu->event_map(attr->config);
836         } else if (attr->type == PERF_TYPE_HW_CACHE) {
837                 pmap = sparc_map_cache_event(attr->config);
838                 if (IS_ERR(pmap))
839                         return PTR_ERR(pmap);
840         } else
841                 return -EOPNOTSUPP;
842
843         /* We save the enable bits in the config_base.  So to
844          * turn off sampling just write 'config', and to enable
845          * things write 'config | config_base'.
846          */
847         hwc->config_base = sparc_pmu->irq_bit;
848         if (!attr->exclude_user)
849                 hwc->config_base |= PCR_UTRACE;
850         if (!attr->exclude_kernel)
851                 hwc->config_base |= PCR_STRACE;
852         if (!attr->exclude_hv)
853                 hwc->config_base |= sparc_pmu->hv_bit;
854
855         hwc->event_base = perf_event_encode(pmap);
856
857         enc = pmap->encoding;
858
859         n = 0;
860         if (event->group_leader != event) {
861                 n = collect_events(event->group_leader,
862                                    perf_max_events - 1,
863                                    evts, events);
864                 if (n < 0)
865                         return -EINVAL;
866         }
867         events[n] = hwc->event_base;
868         evts[n] = event;
869
870         if (check_excludes(evts, n, 1))
871                 return -EINVAL;
872
873         if (sparc_check_constraints(events, n + 1))
874                 return -EINVAL;
875
876         /* Try to do all error checking before this point, as unwinding
877          * state after grabbing the PMC is difficult.
878          */
879         perf_event_grab_pmc();
880         event->destroy = hw_perf_event_destroy;
881
882         if (!hwc->sample_period) {
883                 hwc->sample_period = MAX_PERIOD;
884                 hwc->last_period = hwc->sample_period;
885                 atomic64_set(&hwc->period_left, hwc->sample_period);
886         }
887
888         if (pmap->pic_mask & PIC_UPPER) {
889                 hwc->idx = PIC_UPPER_INDEX;
890                 enc <<= sparc_pmu->upper_shift;
891         } else {
892                 hwc->idx = PIC_LOWER_INDEX;
893                 enc <<= sparc_pmu->lower_shift;
894         }
895
896         hwc->config |= enc;
897         return 0;
898 }
899
900 static const struct pmu pmu = {
901         .enable         = sparc_pmu_enable,
902         .disable        = sparc_pmu_disable,
903         .read           = sparc_pmu_read,
904         .unthrottle     = sparc_pmu_unthrottle,
905 };
906
907 const struct pmu *hw_perf_event_init(struct perf_event *event)
908 {
909         int err = __hw_perf_event_init(event);
910
911         if (err)
912                 return ERR_PTR(err);
913         return &pmu;
914 }
915
916 void perf_event_print_debug(void)
917 {
918         unsigned long flags;
919         u64 pcr, pic;
920         int cpu;
921
922         if (!sparc_pmu)
923                 return;
924
925         local_irq_save(flags);
926
927         cpu = smp_processor_id();
928
929         pcr = pcr_ops->read();
930         read_pic(pic);
931
932         pr_info("\n");
933         pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
934                 cpu, pcr, pic);
935
936         local_irq_restore(flags);
937 }
938
939 static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
940                                             unsigned long cmd, void *__args)
941 {
942         struct die_args *args = __args;
943         struct perf_sample_data data;
944         struct cpu_hw_events *cpuc;
945         struct pt_regs *regs;
946         int idx;
947
948         if (!atomic_read(&active_events))
949                 return NOTIFY_DONE;
950
951         switch (cmd) {
952         case DIE_NMI:
953                 break;
954
955         default:
956                 return NOTIFY_DONE;
957         }
958
959         regs = args->regs;
960
961         data.addr = 0;
962
963         cpuc = &__get_cpu_var(cpu_hw_events);
964         for (idx = 0; idx < MAX_HWEVENTS; idx++) {
965                 struct perf_event *event = cpuc->events[idx];
966                 struct hw_perf_event *hwc;
967                 u64 val;
968
969                 if (!test_bit(idx, cpuc->active_mask))
970                         continue;
971                 hwc = &event->hw;
972                 val = sparc_perf_event_update(event, hwc, idx);
973                 if (val & (1ULL << 31))
974                         continue;
975
976                 data.period = event->hw.last_period;
977                 if (!sparc_perf_event_set_period(event, hwc, idx))
978                         continue;
979
980                 if (perf_event_overflow(event, 1, &data, regs))
981                         sparc_pmu_disable_event(hwc, idx);
982         }
983
984         return NOTIFY_STOP;
985 }
986
987 static __read_mostly struct notifier_block perf_event_nmi_notifier = {
988         .notifier_call          = perf_event_nmi_handler,
989 };
990
991 static bool __init supported_pmu(void)
992 {
993         if (!strcmp(sparc_pmu_type, "ultra3") ||
994             !strcmp(sparc_pmu_type, "ultra3+") ||
995             !strcmp(sparc_pmu_type, "ultra3i") ||
996             !strcmp(sparc_pmu_type, "ultra4+")) {
997                 sparc_pmu = &ultra3_pmu;
998                 return true;
999         }
1000         if (!strcmp(sparc_pmu_type, "niagara")) {
1001                 sparc_pmu = &niagara1_pmu;
1002                 return true;
1003         }
1004         if (!strcmp(sparc_pmu_type, "niagara2")) {
1005                 sparc_pmu = &niagara2_pmu;
1006                 return true;
1007         }
1008         return false;
1009 }
1010
1011 void __init init_hw_perf_events(void)
1012 {
1013         pr_info("Performance events: ");
1014
1015         if (!supported_pmu()) {
1016                 pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
1017                 return;
1018         }
1019
1020         pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
1021
1022         /* All sparc64 PMUs currently have 2 events.  But this simple
1023          * driver only supports one active event at a time.
1024          */
1025         perf_max_events = 1;
1026
1027         register_die_notifier(&perf_event_nmi_notifier);
1028 }