csky: Fix perf record in kernel/user space
[linux-2.6-block.git] / arch / csky / kernel / perf_event.c
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
3
4 #include <linux/errno.h>
5 #include <linux/interrupt.h>
6 #include <linux/module.h>
7 #include <linux/of.h>
8 #include <linux/perf_event.h>
9 #include <linux/platform_device.h>
10
11 #define CSKY_PMU_MAX_EVENTS 32
12 #define DEFAULT_COUNT_WIDTH 48
13
14 #define HPCR            "<0, 0x0>"      /* PMU Control reg */
15 #define HPSPR           "<0, 0x1>"      /* Start PC reg */
16 #define HPEPR           "<0, 0x2>"      /* End PC reg */
17 #define HPSIR           "<0, 0x3>"      /* Soft Counter reg */
18 #define HPCNTENR        "<0, 0x4>"      /* Count Enable reg */
19 #define HPINTENR        "<0, 0x5>"      /* Interrupt Enable reg */
20 #define HPOFSR          "<0, 0x6>"      /* Interrupt Status reg */
21
22 /* The events for a given PMU register set. */
23 struct pmu_hw_events {
24         /*
25          * The events that are active on the PMU for the given index.
26          */
27         struct perf_event *events[CSKY_PMU_MAX_EVENTS];
28
29         /*
30          * A 1 bit for an index indicates that the counter is being used for
31          * an event. A 0 means that the counter can be used.
32          */
33         unsigned long used_mask[BITS_TO_LONGS(CSKY_PMU_MAX_EVENTS)];
34 };
35
36 static uint64_t (*hw_raw_read_mapping[CSKY_PMU_MAX_EVENTS])(void);
37 static void (*hw_raw_write_mapping[CSKY_PMU_MAX_EVENTS])(uint64_t val);
38
39 static struct csky_pmu_t {
40         struct pmu                      pmu;
41         struct pmu_hw_events __percpu   *hw_events;
42         struct platform_device          *plat_device;
43         uint32_t                        count_width;
44         uint32_t                        hpcr;
45         u64                             max_period;
46 } csky_pmu;
47 static int csky_pmu_irq;
48
49 #define to_csky_pmu(p)  (container_of(p, struct csky_pmu, pmu))
50
51 #define cprgr(reg)                              \
52 ({                                              \
53         unsigned int tmp;                       \
54         asm volatile("cprgr %0, "reg"\n"        \
55                      : "=r"(tmp)                \
56                      :                          \
57                      : "memory");               \
58         tmp;                                    \
59 })
60
61 #define cpwgr(reg, val)         \
62 ({                              \
63         asm volatile(           \
64         "cpwgr %0, "reg"\n"     \
65         :                       \
66         : "r"(val)              \
67         : "memory");            \
68 })
69
70 #define cprcr(reg)                              \
71 ({                                              \
72         unsigned int tmp;                       \
73         asm volatile("cprcr %0, "reg"\n"        \
74                      : "=r"(tmp)                \
75                      :                          \
76                      : "memory");               \
77         tmp;                                    \
78 })
79
80 #define cpwcr(reg, val)         \
81 ({                              \
82         asm volatile(           \
83         "cpwcr %0, "reg"\n"     \
84         :                       \
85         : "r"(val)              \
86         : "memory");            \
87 })
88
89 /* cycle counter */
90 static uint64_t csky_pmu_read_cc(void)
91 {
92         uint32_t lo, hi, tmp;
93         uint64_t result;
94
95         do {
96                 tmp = cprgr("<0, 0x3>");
97                 lo  = cprgr("<0, 0x2>");
98                 hi  = cprgr("<0, 0x3>");
99         } while (hi != tmp);
100
101         result = (uint64_t) (hi) << 32;
102         result |= lo;
103
104         return result;
105 }
106
107 static void csky_pmu_write_cc(uint64_t val)
108 {
109         cpwgr("<0, 0x2>", (uint32_t)  val);
110         cpwgr("<0, 0x3>", (uint32_t) (val >> 32));
111 }
112
113 /* instruction counter */
114 static uint64_t csky_pmu_read_ic(void)
115 {
116         uint32_t lo, hi, tmp;
117         uint64_t result;
118
119         do {
120                 tmp = cprgr("<0, 0x5>");
121                 lo  = cprgr("<0, 0x4>");
122                 hi  = cprgr("<0, 0x5>");
123         } while (hi != tmp);
124
125         result = (uint64_t) (hi) << 32;
126         result |= lo;
127
128         return result;
129 }
130
131 static void csky_pmu_write_ic(uint64_t val)
132 {
133         cpwgr("<0, 0x4>", (uint32_t)  val);
134         cpwgr("<0, 0x5>", (uint32_t) (val >> 32));
135 }
136
137 /* l1 icache access counter */
138 static uint64_t csky_pmu_read_icac(void)
139 {
140         uint32_t lo, hi, tmp;
141         uint64_t result;
142
143         do {
144                 tmp = cprgr("<0, 0x7>");
145                 lo  = cprgr("<0, 0x6>");
146                 hi  = cprgr("<0, 0x7>");
147         } while (hi != tmp);
148
149         result = (uint64_t) (hi) << 32;
150         result |= lo;
151
152         return result;
153 }
154
155 static void csky_pmu_write_icac(uint64_t val)
156 {
157         cpwgr("<0, 0x6>", (uint32_t)  val);
158         cpwgr("<0, 0x7>", (uint32_t) (val >> 32));
159 }
160
161 /* l1 icache miss counter */
162 static uint64_t csky_pmu_read_icmc(void)
163 {
164         uint32_t lo, hi, tmp;
165         uint64_t result;
166
167         do {
168                 tmp = cprgr("<0, 0x9>");
169                 lo  = cprgr("<0, 0x8>");
170                 hi  = cprgr("<0, 0x9>");
171         } while (hi != tmp);
172
173         result = (uint64_t) (hi) << 32;
174         result |= lo;
175
176         return result;
177 }
178
179 static void csky_pmu_write_icmc(uint64_t val)
180 {
181         cpwgr("<0, 0x8>", (uint32_t)  val);
182         cpwgr("<0, 0x9>", (uint32_t) (val >> 32));
183 }
184
185 /* l1 dcache access counter */
186 static uint64_t csky_pmu_read_dcac(void)
187 {
188         uint32_t lo, hi, tmp;
189         uint64_t result;
190
191         do {
192                 tmp = cprgr("<0, 0xb>");
193                 lo  = cprgr("<0, 0xa>");
194                 hi  = cprgr("<0, 0xb>");
195         } while (hi != tmp);
196
197         result = (uint64_t) (hi) << 32;
198         result |= lo;
199
200         return result;
201 }
202
203 static void csky_pmu_write_dcac(uint64_t val)
204 {
205         cpwgr("<0, 0xa>", (uint32_t)  val);
206         cpwgr("<0, 0xb>", (uint32_t) (val >> 32));
207 }
208
209 /* l1 dcache miss counter */
210 static uint64_t csky_pmu_read_dcmc(void)
211 {
212         uint32_t lo, hi, tmp;
213         uint64_t result;
214
215         do {
216                 tmp = cprgr("<0, 0xd>");
217                 lo  = cprgr("<0, 0xc>");
218                 hi  = cprgr("<0, 0xd>");
219         } while (hi != tmp);
220
221         result = (uint64_t) (hi) << 32;
222         result |= lo;
223
224         return result;
225 }
226
227 static void csky_pmu_write_dcmc(uint64_t val)
228 {
229         cpwgr("<0, 0xc>", (uint32_t)  val);
230         cpwgr("<0, 0xd>", (uint32_t) (val >> 32));
231 }
232
233 /* l2 cache access counter */
234 static uint64_t csky_pmu_read_l2ac(void)
235 {
236         uint32_t lo, hi, tmp;
237         uint64_t result;
238
239         do {
240                 tmp = cprgr("<0, 0xf>");
241                 lo  = cprgr("<0, 0xe>");
242                 hi  = cprgr("<0, 0xf>");
243         } while (hi != tmp);
244
245         result = (uint64_t) (hi) << 32;
246         result |= lo;
247
248         return result;
249 }
250
251 static void csky_pmu_write_l2ac(uint64_t val)
252 {
253         cpwgr("<0, 0xe>", (uint32_t)  val);
254         cpwgr("<0, 0xf>", (uint32_t) (val >> 32));
255 }
256
257 /* l2 cache miss counter */
258 static uint64_t csky_pmu_read_l2mc(void)
259 {
260         uint32_t lo, hi, tmp;
261         uint64_t result;
262
263         do {
264                 tmp = cprgr("<0, 0x11>");
265                 lo  = cprgr("<0, 0x10>");
266                 hi  = cprgr("<0, 0x11>");
267         } while (hi != tmp);
268
269         result = (uint64_t) (hi) << 32;
270         result |= lo;
271
272         return result;
273 }
274
275 static void csky_pmu_write_l2mc(uint64_t val)
276 {
277         cpwgr("<0, 0x10>", (uint32_t)  val);
278         cpwgr("<0, 0x11>", (uint32_t) (val >> 32));
279 }
280
281 /* I-UTLB miss counter */
282 static uint64_t csky_pmu_read_iutlbmc(void)
283 {
284         uint32_t lo, hi, tmp;
285         uint64_t result;
286
287         do {
288                 tmp = cprgr("<0, 0x15>");
289                 lo  = cprgr("<0, 0x14>");
290                 hi  = cprgr("<0, 0x15>");
291         } while (hi != tmp);
292
293         result = (uint64_t) (hi) << 32;
294         result |= lo;
295
296         return result;
297 }
298
299 static void csky_pmu_write_iutlbmc(uint64_t val)
300 {
301         cpwgr("<0, 0x14>", (uint32_t)  val);
302         cpwgr("<0, 0x15>", (uint32_t) (val >> 32));
303 }
304
305 /* D-UTLB miss counter */
306 static uint64_t csky_pmu_read_dutlbmc(void)
307 {
308         uint32_t lo, hi, tmp;
309         uint64_t result;
310
311         do {
312                 tmp = cprgr("<0, 0x17>");
313                 lo  = cprgr("<0, 0x16>");
314                 hi  = cprgr("<0, 0x17>");
315         } while (hi != tmp);
316
317         result = (uint64_t) (hi) << 32;
318         result |= lo;
319
320         return result;
321 }
322
323 static void csky_pmu_write_dutlbmc(uint64_t val)
324 {
325         cpwgr("<0, 0x16>", (uint32_t)  val);
326         cpwgr("<0, 0x17>", (uint32_t) (val >> 32));
327 }
328
329 /* JTLB miss counter */
330 static uint64_t csky_pmu_read_jtlbmc(void)
331 {
332         uint32_t lo, hi, tmp;
333         uint64_t result;
334
335         do {
336                 tmp = cprgr("<0, 0x19>");
337                 lo  = cprgr("<0, 0x18>");
338                 hi  = cprgr("<0, 0x19>");
339         } while (hi != tmp);
340
341         result = (uint64_t) (hi) << 32;
342         result |= lo;
343
344         return result;
345 }
346
347 static void csky_pmu_write_jtlbmc(uint64_t val)
348 {
349         cpwgr("<0, 0x18>", (uint32_t)  val);
350         cpwgr("<0, 0x19>", (uint32_t) (val >> 32));
351 }
352
353 /* software counter */
354 static uint64_t csky_pmu_read_softc(void)
355 {
356         uint32_t lo, hi, tmp;
357         uint64_t result;
358
359         do {
360                 tmp = cprgr("<0, 0x1b>");
361                 lo  = cprgr("<0, 0x1a>");
362                 hi  = cprgr("<0, 0x1b>");
363         } while (hi != tmp);
364
365         result = (uint64_t) (hi) << 32;
366         result |= lo;
367
368         return result;
369 }
370
371 static void csky_pmu_write_softc(uint64_t val)
372 {
373         cpwgr("<0, 0x1a>", (uint32_t)  val);
374         cpwgr("<0, 0x1b>", (uint32_t) (val >> 32));
375 }
376
377 /* conditional branch mispredict counter */
378 static uint64_t csky_pmu_read_cbmc(void)
379 {
380         uint32_t lo, hi, tmp;
381         uint64_t result;
382
383         do {
384                 tmp = cprgr("<0, 0x1d>");
385                 lo  = cprgr("<0, 0x1c>");
386                 hi  = cprgr("<0, 0x1d>");
387         } while (hi != tmp);
388
389         result = (uint64_t) (hi) << 32;
390         result |= lo;
391
392         return result;
393 }
394
395 static void csky_pmu_write_cbmc(uint64_t val)
396 {
397         cpwgr("<0, 0x1c>", (uint32_t)  val);
398         cpwgr("<0, 0x1d>", (uint32_t) (val >> 32));
399 }
400
401 /* conditional branch instruction counter */
402 static uint64_t csky_pmu_read_cbic(void)
403 {
404         uint32_t lo, hi, tmp;
405         uint64_t result;
406
407         do {
408                 tmp = cprgr("<0, 0x1f>");
409                 lo  = cprgr("<0, 0x1e>");
410                 hi  = cprgr("<0, 0x1f>");
411         } while (hi != tmp);
412
413         result = (uint64_t) (hi) << 32;
414         result |= lo;
415
416         return result;
417 }
418
419 static void csky_pmu_write_cbic(uint64_t val)
420 {
421         cpwgr("<0, 0x1e>", (uint32_t)  val);
422         cpwgr("<0, 0x1f>", (uint32_t) (val >> 32));
423 }
424
425 /* indirect branch mispredict counter */
426 static uint64_t csky_pmu_read_ibmc(void)
427 {
428         uint32_t lo, hi, tmp;
429         uint64_t result;
430
431         do {
432                 tmp = cprgr("<0, 0x21>");
433                 lo  = cprgr("<0, 0x20>");
434                 hi  = cprgr("<0, 0x21>");
435         } while (hi != tmp);
436
437         result = (uint64_t) (hi) << 32;
438         result |= lo;
439
440         return result;
441 }
442
443 static void csky_pmu_write_ibmc(uint64_t val)
444 {
445         cpwgr("<0, 0x20>", (uint32_t)  val);
446         cpwgr("<0, 0x21>", (uint32_t) (val >> 32));
447 }
448
449 /* indirect branch instruction counter */
450 static uint64_t csky_pmu_read_ibic(void)
451 {
452         uint32_t lo, hi, tmp;
453         uint64_t result;
454
455         do {
456                 tmp = cprgr("<0, 0x23>");
457                 lo  = cprgr("<0, 0x22>");
458                 hi  = cprgr("<0, 0x23>");
459         } while (hi != tmp);
460
461         result = (uint64_t) (hi) << 32;
462         result |= lo;
463
464         return result;
465 }
466
467 static void csky_pmu_write_ibic(uint64_t val)
468 {
469         cpwgr("<0, 0x22>", (uint32_t)  val);
470         cpwgr("<0, 0x23>", (uint32_t) (val >> 32));
471 }
472
473 /* LSU spec fail counter */
474 static uint64_t csky_pmu_read_lsfc(void)
475 {
476         uint32_t lo, hi, tmp;
477         uint64_t result;
478
479         do {
480                 tmp = cprgr("<0, 0x25>");
481                 lo  = cprgr("<0, 0x24>");
482                 hi  = cprgr("<0, 0x25>");
483         } while (hi != tmp);
484
485         result = (uint64_t) (hi) << 32;
486         result |= lo;
487
488         return result;
489 }
490
491 static void csky_pmu_write_lsfc(uint64_t val)
492 {
493         cpwgr("<0, 0x24>", (uint32_t)  val);
494         cpwgr("<0, 0x25>", (uint32_t) (val >> 32));
495 }
496
497 /* store instruction counter */
498 static uint64_t csky_pmu_read_sic(void)
499 {
500         uint32_t lo, hi, tmp;
501         uint64_t result;
502
503         do {
504                 tmp = cprgr("<0, 0x27>");
505                 lo  = cprgr("<0, 0x26>");
506                 hi  = cprgr("<0, 0x27>");
507         } while (hi != tmp);
508
509         result = (uint64_t) (hi) << 32;
510         result |= lo;
511
512         return result;
513 }
514
515 static void csky_pmu_write_sic(uint64_t val)
516 {
517         cpwgr("<0, 0x26>", (uint32_t)  val);
518         cpwgr("<0, 0x27>", (uint32_t) (val >> 32));
519 }
520
521 /* dcache read access counter */
522 static uint64_t csky_pmu_read_dcrac(void)
523 {
524         uint32_t lo, hi, tmp;
525         uint64_t result;
526
527         do {
528                 tmp = cprgr("<0, 0x29>");
529                 lo  = cprgr("<0, 0x28>");
530                 hi  = cprgr("<0, 0x29>");
531         } while (hi != tmp);
532
533         result = (uint64_t) (hi) << 32;
534         result |= lo;
535
536         return result;
537 }
538
539 static void csky_pmu_write_dcrac(uint64_t val)
540 {
541         cpwgr("<0, 0x28>", (uint32_t)  val);
542         cpwgr("<0, 0x29>", (uint32_t) (val >> 32));
543 }
544
545 /* dcache read miss counter */
546 static uint64_t csky_pmu_read_dcrmc(void)
547 {
548         uint32_t lo, hi, tmp;
549         uint64_t result;
550
551         do {
552                 tmp = cprgr("<0, 0x2b>");
553                 lo  = cprgr("<0, 0x2a>");
554                 hi  = cprgr("<0, 0x2b>");
555         } while (hi != tmp);
556
557         result = (uint64_t) (hi) << 32;
558         result |= lo;
559
560         return result;
561 }
562
563 static void csky_pmu_write_dcrmc(uint64_t val)
564 {
565         cpwgr("<0, 0x2a>", (uint32_t)  val);
566         cpwgr("<0, 0x2b>", (uint32_t) (val >> 32));
567 }
568
569 /* dcache write access counter */
570 static uint64_t csky_pmu_read_dcwac(void)
571 {
572         uint32_t lo, hi, tmp;
573         uint64_t result;
574
575         do {
576                 tmp = cprgr("<0, 0x2d>");
577                 lo  = cprgr("<0, 0x2c>");
578                 hi  = cprgr("<0, 0x2d>");
579         } while (hi != tmp);
580
581         result = (uint64_t) (hi) << 32;
582         result |= lo;
583
584         return result;
585 }
586
587 static void csky_pmu_write_dcwac(uint64_t val)
588 {
589         cpwgr("<0, 0x2c>", (uint32_t)  val);
590         cpwgr("<0, 0x2d>", (uint32_t) (val >> 32));
591 }
592
593 /* dcache write miss counter */
594 static uint64_t csky_pmu_read_dcwmc(void)
595 {
596         uint32_t lo, hi, tmp;
597         uint64_t result;
598
599         do {
600                 tmp = cprgr("<0, 0x2f>");
601                 lo  = cprgr("<0, 0x2e>");
602                 hi  = cprgr("<0, 0x2f>");
603         } while (hi != tmp);
604
605         result = (uint64_t) (hi) << 32;
606         result |= lo;
607
608         return result;
609 }
610
611 static void csky_pmu_write_dcwmc(uint64_t val)
612 {
613         cpwgr("<0, 0x2e>", (uint32_t)  val);
614         cpwgr("<0, 0x2f>", (uint32_t) (val >> 32));
615 }
616
617 /* l2cache read access counter */
618 static uint64_t csky_pmu_read_l2rac(void)
619 {
620         uint32_t lo, hi, tmp;
621         uint64_t result;
622
623         do {
624                 tmp = cprgr("<0, 0x31>");
625                 lo  = cprgr("<0, 0x30>");
626                 hi  = cprgr("<0, 0x31>");
627         } while (hi != tmp);
628
629         result = (uint64_t) (hi) << 32;
630         result |= lo;
631
632         return result;
633 }
634
635 static void csky_pmu_write_l2rac(uint64_t val)
636 {
637         cpwgr("<0, 0x30>", (uint32_t)  val);
638         cpwgr("<0, 0x31>", (uint32_t) (val >> 32));
639 }
640
641 /* l2cache read miss counter */
642 static uint64_t csky_pmu_read_l2rmc(void)
643 {
644         uint32_t lo, hi, tmp;
645         uint64_t result;
646
647         do {
648                 tmp = cprgr("<0, 0x33>");
649                 lo  = cprgr("<0, 0x32>");
650                 hi  = cprgr("<0, 0x33>");
651         } while (hi != tmp);
652
653         result = (uint64_t) (hi) << 32;
654         result |= lo;
655
656         return result;
657 }
658
659 static void csky_pmu_write_l2rmc(uint64_t val)
660 {
661         cpwgr("<0, 0x32>", (uint32_t)  val);
662         cpwgr("<0, 0x33>", (uint32_t) (val >> 32));
663 }
664
665 /* l2cache write access counter */
666 static uint64_t csky_pmu_read_l2wac(void)
667 {
668         uint32_t lo, hi, tmp;
669         uint64_t result;
670
671         do {
672                 tmp = cprgr("<0, 0x35>");
673                 lo  = cprgr("<0, 0x34>");
674                 hi  = cprgr("<0, 0x35>");
675         } while (hi != tmp);
676
677         result = (uint64_t) (hi) << 32;
678         result |= lo;
679
680         return result;
681 }
682
683 static void csky_pmu_write_l2wac(uint64_t val)
684 {
685         cpwgr("<0, 0x34>", (uint32_t)  val);
686         cpwgr("<0, 0x35>", (uint32_t) (val >> 32));
687 }
688
689 /* l2cache write miss counter */
690 static uint64_t csky_pmu_read_l2wmc(void)
691 {
692         uint32_t lo, hi, tmp;
693         uint64_t result;
694
695         do {
696                 tmp = cprgr("<0, 0x37>");
697                 lo  = cprgr("<0, 0x36>");
698                 hi  = cprgr("<0, 0x37>");
699         } while (hi != tmp);
700
701         result = (uint64_t) (hi) << 32;
702         result |= lo;
703
704         return result;
705 }
706
707 static void csky_pmu_write_l2wmc(uint64_t val)
708 {
709         cpwgr("<0, 0x36>", (uint32_t)  val);
710         cpwgr("<0, 0x37>", (uint32_t) (val >> 32));
711 }
712
713 #define HW_OP_UNSUPPORTED       0xffff
714 static const int csky_pmu_hw_map[PERF_COUNT_HW_MAX] = {
715         [PERF_COUNT_HW_CPU_CYCLES]              = 0x1,
716         [PERF_COUNT_HW_INSTRUCTIONS]            = 0x2,
717         [PERF_COUNT_HW_CACHE_REFERENCES]        = HW_OP_UNSUPPORTED,
718         [PERF_COUNT_HW_CACHE_MISSES]            = HW_OP_UNSUPPORTED,
719         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0xf,
720         [PERF_COUNT_HW_BRANCH_MISSES]           = 0xe,
721         [PERF_COUNT_HW_BUS_CYCLES]              = HW_OP_UNSUPPORTED,
722         [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
723         [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]  = HW_OP_UNSUPPORTED,
724         [PERF_COUNT_HW_REF_CPU_CYCLES]          = HW_OP_UNSUPPORTED,
725 };
726
727 #define C(_x)                   PERF_COUNT_HW_CACHE_##_x
728 #define CACHE_OP_UNSUPPORTED    0xffff
729 static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
730         [C(L1D)] = {
731                 [C(OP_READ)] = {
732                         [C(RESULT_ACCESS)]      = 0x14,
733                         [C(RESULT_MISS)]        = 0x15,
734                 },
735                 [C(OP_WRITE)] = {
736                         [C(RESULT_ACCESS)]      = 0x16,
737                         [C(RESULT_MISS)]        = 0x17,
738                 },
739                 [C(OP_PREFETCH)] = {
740                         [C(RESULT_ACCESS)]      = 0x5,
741                         [C(RESULT_MISS)]        = 0x6,
742                 },
743         },
744         [C(L1I)] = {
745                 [C(OP_READ)] = {
746                         [C(RESULT_ACCESS)]      = 0x3,
747                         [C(RESULT_MISS)]        = 0x4,
748                 },
749                 [C(OP_WRITE)] = {
750                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
751                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
752                 },
753                 [C(OP_PREFETCH)] = {
754                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
755                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
756                 },
757         },
758         [C(LL)] = {
759                 [C(OP_READ)] = {
760                         [C(RESULT_ACCESS)]      = 0x18,
761                         [C(RESULT_MISS)]        = 0x19,
762                 },
763                 [C(OP_WRITE)] = {
764                         [C(RESULT_ACCESS)]      = 0x1a,
765                         [C(RESULT_MISS)]        = 0x1b,
766                 },
767                 [C(OP_PREFETCH)] = {
768                         [C(RESULT_ACCESS)]      = 0x7,
769                         [C(RESULT_MISS)]        = 0x8,
770                 },
771         },
772         [C(DTLB)] = {
773                 [C(OP_READ)] = {
774                         [C(RESULT_ACCESS)]      = 0x5,
775                         [C(RESULT_MISS)]        = 0xb,
776                 },
777                 [C(OP_WRITE)] = {
778                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
779                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
780                 },
781                 [C(OP_PREFETCH)] = {
782                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
783                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
784                 },
785         },
786         [C(ITLB)] = {
787                 [C(OP_READ)] = {
788                         [C(RESULT_ACCESS)]      = 0x3,
789                         [C(RESULT_MISS)]        = 0xa,
790                 },
791                 [C(OP_WRITE)] = {
792                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
793                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
794                 },
795                 [C(OP_PREFETCH)] = {
796                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
797                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
798                 },
799         },
800         [C(BPU)] = {
801                 [C(OP_READ)] = {
802                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
803                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
804                 },
805                 [C(OP_WRITE)] = {
806                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
807                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
808                 },
809                 [C(OP_PREFETCH)] = {
810                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
811                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
812                 },
813         },
814         [C(NODE)] = {
815                 [C(OP_READ)] = {
816                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
817                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
818                 },
819                 [C(OP_WRITE)] = {
820                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
821                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
822                 },
823                 [C(OP_PREFETCH)] = {
824                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
825                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
826                 },
827         },
828 };
829
830 int  csky_pmu_event_set_period(struct perf_event *event)
831 {
832         struct hw_perf_event *hwc = &event->hw;
833         s64 left = local64_read(&hwc->period_left);
834         s64 period = hwc->sample_period;
835         int ret = 0;
836
837         if (unlikely(left <= -period)) {
838                 left = period;
839                 local64_set(&hwc->period_left, left);
840                 hwc->last_period = period;
841                 ret = 1;
842         }
843
844         if (unlikely(left <= 0)) {
845                 left += period;
846                 local64_set(&hwc->period_left, left);
847                 hwc->last_period = period;
848                 ret = 1;
849         }
850
851         if (left > (s64)csky_pmu.max_period)
852                 left = csky_pmu.max_period;
853
854         /*
855          * The hw event starts counting from this event offset,
856          * mark it to be able to extract future "deltas":
857          */
858         local64_set(&hwc->prev_count, (u64)(-left));
859
860         if (hw_raw_write_mapping[hwc->idx] != NULL)
861                 hw_raw_write_mapping[hwc->idx]((u64)(-left) &
862                                                 csky_pmu.max_period);
863
864         cpwcr(HPOFSR, ~BIT(hwc->idx) & cprcr(HPOFSR));
865
866         perf_event_update_userpage(event);
867
868         return ret;
869 }
870
871 static void csky_perf_event_update(struct perf_event *event,
872                                    struct hw_perf_event *hwc)
873 {
874         uint64_t prev_raw_count = local64_read(&hwc->prev_count);
875         /*
876          * Sign extend count value to 64bit, otherwise delta calculation
877          * would be incorrect when overflow occurs.
878          */
879         uint64_t new_raw_count = sign_extend64(
880                 hw_raw_read_mapping[hwc->idx](), csky_pmu.count_width - 1);
881         int64_t delta = new_raw_count - prev_raw_count;
882
883         /*
884          * We aren't afraid of hwc->prev_count changing beneath our feet
885          * because there's no way for us to re-enter this function anytime.
886          */
887         local64_set(&hwc->prev_count, new_raw_count);
888         local64_add(delta, &event->count);
889         local64_sub(delta, &hwc->period_left);
890 }
891
892 static void csky_pmu_reset(void *info)
893 {
894         cpwcr(HPCR, BIT(31) | BIT(30) | BIT(1));
895 }
896
897 static void csky_pmu_read(struct perf_event *event)
898 {
899         csky_perf_event_update(event, &event->hw);
900 }
901
902 static int csky_pmu_cache_event(u64 config)
903 {
904         unsigned int cache_type, cache_op, cache_result;
905
906         cache_type      = (config >>  0) & 0xff;
907         cache_op        = (config >>  8) & 0xff;
908         cache_result    = (config >> 16) & 0xff;
909
910         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
911                 return -EINVAL;
912         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
913                 return -EINVAL;
914         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
915                 return -EINVAL;
916
917         return csky_pmu_cache_map[cache_type][cache_op][cache_result];
918 }
919
920 static int csky_pmu_event_init(struct perf_event *event)
921 {
922         struct hw_perf_event *hwc = &event->hw;
923         int ret;
924
925         switch (event->attr.type) {
926         case PERF_TYPE_HARDWARE:
927                 if (event->attr.config >= PERF_COUNT_HW_MAX)
928                         return -ENOENT;
929                 ret = csky_pmu_hw_map[event->attr.config];
930                 if (ret == HW_OP_UNSUPPORTED)
931                         return -ENOENT;
932                 hwc->idx = ret;
933                 break;
934         case PERF_TYPE_HW_CACHE:
935                 ret = csky_pmu_cache_event(event->attr.config);
936                 if (ret == CACHE_OP_UNSUPPORTED)
937                         return -ENOENT;
938                 hwc->idx = ret;
939                 break;
940         case PERF_TYPE_RAW:
941                 if (hw_raw_read_mapping[event->attr.config] == NULL)
942                         return -ENOENT;
943                 hwc->idx = event->attr.config;
944                 break;
945         default:
946                 return -ENOENT;
947         }
948
949         if (event->attr.exclude_user)
950                 csky_pmu.hpcr = BIT(2);
951         else if (event->attr.exclude_kernel)
952                 csky_pmu.hpcr = BIT(3);
953         else
954                 csky_pmu.hpcr = BIT(2) | BIT(3);
955
956         csky_pmu.hpcr |= BIT(1) | BIT(0);
957
958         return 0;
959 }
960
961 /* starts all counters */
962 static void csky_pmu_enable(struct pmu *pmu)
963 {
964         cpwcr(HPCR, csky_pmu.hpcr);
965 }
966
967 /* stops all counters */
968 static void csky_pmu_disable(struct pmu *pmu)
969 {
970         cpwcr(HPCR, BIT(1));
971 }
972
973 static void csky_pmu_start(struct perf_event *event, int flags)
974 {
975         unsigned long flg;
976         struct hw_perf_event *hwc = &event->hw;
977         int idx = hwc->idx;
978
979         if (WARN_ON_ONCE(idx == -1))
980                 return;
981
982         if (flags & PERF_EF_RELOAD)
983                 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
984
985         hwc->state = 0;
986
987         csky_pmu_event_set_period(event);
988
989         local_irq_save(flg);
990
991         cpwcr(HPINTENR, BIT(idx) | cprcr(HPINTENR));
992         cpwcr(HPCNTENR, BIT(idx) | cprcr(HPCNTENR));
993
994         local_irq_restore(flg);
995 }
996
997 static void csky_pmu_stop_event(struct perf_event *event)
998 {
999         unsigned long flg;
1000         struct hw_perf_event *hwc = &event->hw;
1001         int idx = hwc->idx;
1002
1003         local_irq_save(flg);
1004
1005         cpwcr(HPINTENR, ~BIT(idx) & cprcr(HPINTENR));
1006         cpwcr(HPCNTENR, ~BIT(idx) & cprcr(HPCNTENR));
1007
1008         local_irq_restore(flg);
1009 }
1010
1011 static void csky_pmu_stop(struct perf_event *event, int flags)
1012 {
1013         if (!(event->hw.state & PERF_HES_STOPPED)) {
1014                 csky_pmu_stop_event(event);
1015                 event->hw.state |= PERF_HES_STOPPED;
1016         }
1017
1018         if ((flags & PERF_EF_UPDATE) &&
1019             !(event->hw.state & PERF_HES_UPTODATE)) {
1020                 csky_perf_event_update(event, &event->hw);
1021                 event->hw.state |= PERF_HES_UPTODATE;
1022         }
1023 }
1024
1025 static void csky_pmu_del(struct perf_event *event, int flags)
1026 {
1027         struct pmu_hw_events *hw_events = this_cpu_ptr(csky_pmu.hw_events);
1028         struct hw_perf_event *hwc = &event->hw;
1029
1030         csky_pmu_stop(event, PERF_EF_UPDATE);
1031
1032         hw_events->events[hwc->idx] = NULL;
1033
1034         perf_event_update_userpage(event);
1035 }
1036
1037 /* allocate hardware counter and optionally start counting */
1038 static int csky_pmu_add(struct perf_event *event, int flags)
1039 {
1040         struct pmu_hw_events *hw_events = this_cpu_ptr(csky_pmu.hw_events);
1041         struct hw_perf_event *hwc = &event->hw;
1042
1043         hw_events->events[hwc->idx] = event;
1044
1045         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1046
1047         if (flags & PERF_EF_START)
1048                 csky_pmu_start(event, PERF_EF_RELOAD);
1049
1050         perf_event_update_userpage(event);
1051
1052         return 0;
1053 }
1054
1055 static irqreturn_t csky_pmu_handle_irq(int irq_num, void *dev)
1056 {
1057         struct perf_sample_data data;
1058         struct pmu_hw_events *cpuc = this_cpu_ptr(csky_pmu.hw_events);
1059         struct pt_regs *regs;
1060         int idx;
1061
1062         /*
1063          * Did an overflow occur?
1064          */
1065         if (!cprcr(HPOFSR))
1066                 return IRQ_NONE;
1067
1068         /*
1069          * Handle the counter(s) overflow(s)
1070          */
1071         regs = get_irq_regs();
1072
1073         csky_pmu_disable(&csky_pmu.pmu);
1074
1075         for (idx = 0; idx < CSKY_PMU_MAX_EVENTS; ++idx) {
1076                 struct perf_event *event = cpuc->events[idx];
1077                 struct hw_perf_event *hwc;
1078
1079                 /* Ignore if we don't have an event. */
1080                 if (!event)
1081                         continue;
1082                 /*
1083                  * We have a single interrupt for all counters. Check that
1084                  * each counter has overflowed before we process it.
1085                  */
1086                 if (!(cprcr(HPOFSR) & BIT(idx)))
1087                         continue;
1088
1089                 hwc = &event->hw;
1090                 csky_perf_event_update(event, &event->hw);
1091                 perf_sample_data_init(&data, 0, hwc->last_period);
1092                 csky_pmu_event_set_period(event);
1093
1094                 if (perf_event_overflow(event, &data, regs))
1095                         csky_pmu_stop_event(event);
1096         }
1097
1098         csky_pmu_enable(&csky_pmu.pmu);
1099
1100         /*
1101          * Handle the pending perf events.
1102          *
1103          * Note: this call *must* be run with interrupts disabled. For
1104          * platforms that can have the PMU interrupts raised as an NMI, this
1105          * will not work.
1106          */
1107         irq_work_run();
1108
1109         return IRQ_HANDLED;
1110 }
1111
1112 static int csky_pmu_request_irq(irq_handler_t handler)
1113 {
1114         int err, irqs;
1115         struct platform_device *pmu_device = csky_pmu.plat_device;
1116
1117         if (!pmu_device)
1118                 return -ENODEV;
1119
1120         irqs = min(pmu_device->num_resources, num_possible_cpus());
1121         if (irqs < 1) {
1122                 pr_err("no irqs for PMUs defined\n");
1123                 return -ENODEV;
1124         }
1125
1126         csky_pmu_irq = platform_get_irq(pmu_device, 0);
1127         if (csky_pmu_irq < 0)
1128                 return -ENODEV;
1129         err = request_percpu_irq(csky_pmu_irq, handler, "csky-pmu",
1130                                  this_cpu_ptr(csky_pmu.hw_events));
1131         if (err) {
1132                 pr_err("unable to request IRQ%d for CSKY PMU counters\n",
1133                        csky_pmu_irq);
1134                 return err;
1135         }
1136
1137         return 0;
1138 }
1139
1140 static void csky_pmu_free_irq(void)
1141 {
1142         int irq;
1143         struct platform_device *pmu_device = csky_pmu.plat_device;
1144
1145         irq = platform_get_irq(pmu_device, 0);
1146         if (irq >= 0)
1147                 free_percpu_irq(irq, this_cpu_ptr(csky_pmu.hw_events));
1148 }
1149
1150 int init_hw_perf_events(void)
1151 {
1152         csky_pmu.hw_events = alloc_percpu_gfp(struct pmu_hw_events,
1153                                               GFP_KERNEL);
1154         if (!csky_pmu.hw_events) {
1155                 pr_info("failed to allocate per-cpu PMU data.\n");
1156                 return -ENOMEM;
1157         }
1158
1159         csky_pmu.pmu = (struct pmu) {
1160                 .pmu_enable     = csky_pmu_enable,
1161                 .pmu_disable    = csky_pmu_disable,
1162                 .event_init     = csky_pmu_event_init,
1163                 .add            = csky_pmu_add,
1164                 .del            = csky_pmu_del,
1165                 .start          = csky_pmu_start,
1166                 .stop           = csky_pmu_stop,
1167                 .read           = csky_pmu_read,
1168         };
1169
1170         memset((void *)hw_raw_read_mapping, 0,
1171                 sizeof(hw_raw_read_mapping[CSKY_PMU_MAX_EVENTS]));
1172
1173         hw_raw_read_mapping[0x1]  = csky_pmu_read_cc;
1174         hw_raw_read_mapping[0x2]  = csky_pmu_read_ic;
1175         hw_raw_read_mapping[0x3]  = csky_pmu_read_icac;
1176         hw_raw_read_mapping[0x4]  = csky_pmu_read_icmc;
1177         hw_raw_read_mapping[0x5]  = csky_pmu_read_dcac;
1178         hw_raw_read_mapping[0x6]  = csky_pmu_read_dcmc;
1179         hw_raw_read_mapping[0x7]  = csky_pmu_read_l2ac;
1180         hw_raw_read_mapping[0x8]  = csky_pmu_read_l2mc;
1181         hw_raw_read_mapping[0xa]  = csky_pmu_read_iutlbmc;
1182         hw_raw_read_mapping[0xb]  = csky_pmu_read_dutlbmc;
1183         hw_raw_read_mapping[0xc]  = csky_pmu_read_jtlbmc;
1184         hw_raw_read_mapping[0xd]  = csky_pmu_read_softc;
1185         hw_raw_read_mapping[0xe]  = csky_pmu_read_cbmc;
1186         hw_raw_read_mapping[0xf]  = csky_pmu_read_cbic;
1187         hw_raw_read_mapping[0x10] = csky_pmu_read_ibmc;
1188         hw_raw_read_mapping[0x11] = csky_pmu_read_ibic;
1189         hw_raw_read_mapping[0x12] = csky_pmu_read_lsfc;
1190         hw_raw_read_mapping[0x13] = csky_pmu_read_sic;
1191         hw_raw_read_mapping[0x14] = csky_pmu_read_dcrac;
1192         hw_raw_read_mapping[0x15] = csky_pmu_read_dcrmc;
1193         hw_raw_read_mapping[0x16] = csky_pmu_read_dcwac;
1194         hw_raw_read_mapping[0x17] = csky_pmu_read_dcwmc;
1195         hw_raw_read_mapping[0x18] = csky_pmu_read_l2rac;
1196         hw_raw_read_mapping[0x19] = csky_pmu_read_l2rmc;
1197         hw_raw_read_mapping[0x1a] = csky_pmu_read_l2wac;
1198         hw_raw_read_mapping[0x1b] = csky_pmu_read_l2wmc;
1199
1200         memset((void *)hw_raw_write_mapping, 0,
1201                 sizeof(hw_raw_write_mapping[CSKY_PMU_MAX_EVENTS]));
1202
1203         hw_raw_write_mapping[0x1]  = csky_pmu_write_cc;
1204         hw_raw_write_mapping[0x2]  = csky_pmu_write_ic;
1205         hw_raw_write_mapping[0x3]  = csky_pmu_write_icac;
1206         hw_raw_write_mapping[0x4]  = csky_pmu_write_icmc;
1207         hw_raw_write_mapping[0x5]  = csky_pmu_write_dcac;
1208         hw_raw_write_mapping[0x6]  = csky_pmu_write_dcmc;
1209         hw_raw_write_mapping[0x7]  = csky_pmu_write_l2ac;
1210         hw_raw_write_mapping[0x8]  = csky_pmu_write_l2mc;
1211         hw_raw_write_mapping[0xa]  = csky_pmu_write_iutlbmc;
1212         hw_raw_write_mapping[0xb]  = csky_pmu_write_dutlbmc;
1213         hw_raw_write_mapping[0xc]  = csky_pmu_write_jtlbmc;
1214         hw_raw_write_mapping[0xd]  = csky_pmu_write_softc;
1215         hw_raw_write_mapping[0xe]  = csky_pmu_write_cbmc;
1216         hw_raw_write_mapping[0xf]  = csky_pmu_write_cbic;
1217         hw_raw_write_mapping[0x10] = csky_pmu_write_ibmc;
1218         hw_raw_write_mapping[0x11] = csky_pmu_write_ibic;
1219         hw_raw_write_mapping[0x12] = csky_pmu_write_lsfc;
1220         hw_raw_write_mapping[0x13] = csky_pmu_write_sic;
1221         hw_raw_write_mapping[0x14] = csky_pmu_write_dcrac;
1222         hw_raw_write_mapping[0x15] = csky_pmu_write_dcrmc;
1223         hw_raw_write_mapping[0x16] = csky_pmu_write_dcwac;
1224         hw_raw_write_mapping[0x17] = csky_pmu_write_dcwmc;
1225         hw_raw_write_mapping[0x18] = csky_pmu_write_l2rac;
1226         hw_raw_write_mapping[0x19] = csky_pmu_write_l2rmc;
1227         hw_raw_write_mapping[0x1a] = csky_pmu_write_l2wac;
1228         hw_raw_write_mapping[0x1b] = csky_pmu_write_l2wmc;
1229
1230         return 0;
1231 }
1232
1233 static int csky_pmu_starting_cpu(unsigned int cpu)
1234 {
1235         enable_percpu_irq(csky_pmu_irq, 0);
1236         return 0;
1237 }
1238
1239 static int csky_pmu_dying_cpu(unsigned int cpu)
1240 {
1241         disable_percpu_irq(csky_pmu_irq);
1242         return 0;
1243 }
1244
1245 int csky_pmu_device_probe(struct platform_device *pdev,
1246                           const struct of_device_id *of_table)
1247 {
1248         struct device_node *node = pdev->dev.of_node;
1249         int ret;
1250
1251         ret = init_hw_perf_events();
1252         if (ret) {
1253                 pr_notice("[perf] failed to probe PMU!\n");
1254                 return ret;
1255         }
1256
1257         if (of_property_read_u32(node, "count-width",
1258                                  &csky_pmu.count_width)) {
1259                 csky_pmu.count_width = DEFAULT_COUNT_WIDTH;
1260         }
1261         csky_pmu.max_period = BIT(csky_pmu.count_width) - 1;
1262
1263         csky_pmu.plat_device = pdev;
1264
1265         /* Ensure the PMU has sane values out of reset. */
1266         on_each_cpu(csky_pmu_reset, &csky_pmu, 1);
1267
1268         ret = csky_pmu_request_irq(csky_pmu_handle_irq);
1269         if (ret) {
1270                 csky_pmu.pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1271                 pr_notice("[perf] PMU request irq fail!\n");
1272         }
1273
1274         ret = cpuhp_setup_state(CPUHP_AP_PERF_ONLINE, "AP_PERF_ONLINE",
1275                                 csky_pmu_starting_cpu,
1276                                 csky_pmu_dying_cpu);
1277         if (ret) {
1278                 csky_pmu_free_irq();
1279                 free_percpu(csky_pmu.hw_events);
1280                 return ret;
1281         }
1282
1283         ret = perf_pmu_register(&csky_pmu.pmu, "cpu", PERF_TYPE_RAW);
1284         if (ret) {
1285                 csky_pmu_free_irq();
1286                 free_percpu(csky_pmu.hw_events);
1287         }
1288
1289         return ret;
1290 }
1291
1292 const static struct of_device_id csky_pmu_of_device_ids[] = {
1293         {.compatible = "csky,csky-pmu"},
1294         {},
1295 };
1296
1297 static int csky_pmu_dev_probe(struct platform_device *pdev)
1298 {
1299         return csky_pmu_device_probe(pdev, csky_pmu_of_device_ids);
1300 }
1301
1302 static struct platform_driver csky_pmu_driver = {
1303         .driver = {
1304                    .name = "csky-pmu",
1305                    .of_match_table = csky_pmu_of_device_ids,
1306                    },
1307         .probe = csky_pmu_dev_probe,
1308 };
1309
1310 static int __init csky_pmu_probe(void)
1311 {
1312         int ret;
1313
1314         ret = platform_driver_register(&csky_pmu_driver);
1315         if (ret)
1316                 pr_notice("[perf] PMU initialization failed\n");
1317         else
1318                 pr_notice("[perf] PMU initialization done\n");
1319
1320         return ret;
1321 }
1322
1323 device_initcall(csky_pmu_probe);