nvme: optimise io_uring passthrough completion
[linux-block.git] / arch / powerpc / perf / generic-compat-pmu.c
1 // SPDX-License-Identifier: GPL-2.0+
2 //
3 // Copyright 2019 Madhavan Srinivasan, IBM Corporation.
4
5 #define pr_fmt(fmt)     "generic-compat-pmu: " fmt
6
7 #include "isa207-common.h"
8
9 /*
10  * Raw event encoding:
11  *
12  *        60        56        52        48        44        40        36        32
13  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
14  *
15  *        28        24        20        16        12         8         4         0
16  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
17  *                                 [ pmc ]                       [    pmcxsel    ]
18  */
19
20 /*
21  * Event codes defined in ISA v3.0B
22  */
23 #define EVENT(_name, _code)     _name = _code,
24
25 enum {
26         /* Cycles, alternate code */
27         EVENT(PM_CYC_ALT,                       0x100f0)
28         /* One or more instructions completed in a cycle */
29         EVENT(PM_CYC_INST_CMPL,                 0x100f2)
30         /* Floating-point instruction completed */
31         EVENT(PM_FLOP_CMPL,                     0x100f4)
32         /* Instruction ERAT/L1-TLB miss */
33         EVENT(PM_L1_ITLB_MISS,                  0x100f6)
34         /* All instructions completed and none available */
35         EVENT(PM_NO_INST_AVAIL,                 0x100f8)
36         /* A load-type instruction completed (ISA v3.0+) */
37         EVENT(PM_LD_CMPL,                       0x100fc)
38         /* Instruction completed, alternate code (ISA v3.0+) */
39         EVENT(PM_INST_CMPL_ALT,                 0x100fe)
40         /* A store-type instruction completed */
41         EVENT(PM_ST_CMPL,                       0x200f0)
42         /* Instruction Dispatched */
43         EVENT(PM_INST_DISP,                     0x200f2)
44         /* Run_cycles */
45         EVENT(PM_RUN_CYC,                       0x200f4)
46         /* Data ERAT/L1-TLB miss/reload */
47         EVENT(PM_L1_DTLB_RELOAD,                0x200f6)
48         /* Taken branch completed */
49         EVENT(PM_BR_TAKEN_CMPL,                 0x200fa)
50         /* Demand iCache Miss */
51         EVENT(PM_L1_ICACHE_MISS,                0x200fc)
52         /* L1 Dcache reload from memory */
53         EVENT(PM_L1_RELOAD_FROM_MEM,            0x200fe)
54         /* L1 Dcache store miss */
55         EVENT(PM_ST_MISS_L1,                    0x300f0)
56         /* Alternate code for PM_INST_DISP */
57         EVENT(PM_INST_DISP_ALT,                 0x300f2)
58         /* Branch direction or target mispredicted */
59         EVENT(PM_BR_MISPREDICT,                 0x300f6)
60         /* Data TLB miss/reload */
61         EVENT(PM_DTLB_MISS,                     0x300fc)
62         /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
63         EVENT(PM_DATA_FROM_L3MISS,              0x300fe)
64         /* L1 Dcache load miss */
65         EVENT(PM_LD_MISS_L1,                    0x400f0)
66         /* Cycle when instruction(s) dispatched */
67         EVENT(PM_CYC_INST_DISP,                 0x400f2)
68         /* Branch or branch target mispredicted */
69         EVENT(PM_BR_MPRED_CMPL,                 0x400f6)
70         /* Instructions completed with run latch set */
71         EVENT(PM_RUN_INST_CMPL,                 0x400fa)
72         /* Instruction TLB miss/reload */
73         EVENT(PM_ITLB_MISS,                     0x400fc)
74         /* Load data not cached */
75         EVENT(PM_LD_NOT_CACHED,                 0x400fe)
76         /* Instructions */
77         EVENT(PM_INST_CMPL,                     0x500fa)
78         /* Cycles */
79         EVENT(PM_CYC,                           0x600f4)
80 };
81
82 #undef EVENT
83
84 /* Table of alternatives, sorted in increasing order of column 0 */
85 /* Note that in each row, column 0 must be the smallest */
86 static const unsigned int generic_event_alternatives[][MAX_ALT] = {
87         { PM_CYC_ALT,                   PM_CYC },
88         { PM_INST_CMPL_ALT,             PM_INST_CMPL },
89         { PM_INST_DISP,                 PM_INST_DISP_ALT },
90 };
91
92 static int generic_get_alternatives(u64 event, unsigned int flags, u64 alt[])
93 {
94         int num_alt = 0;
95
96         num_alt = isa207_get_alternatives(event, alt,
97                                           ARRAY_SIZE(generic_event_alternatives), flags,
98                                           generic_event_alternatives);
99
100         return num_alt;
101 }
102
103 GENERIC_EVENT_ATTR(cpu-cycles,                  PM_CYC);
104 GENERIC_EVENT_ATTR(instructions,                PM_INST_CMPL);
105 GENERIC_EVENT_ATTR(stalled-cycles-frontend,     PM_NO_INST_AVAIL);
106 GENERIC_EVENT_ATTR(branch-misses,               PM_BR_MPRED_CMPL);
107 GENERIC_EVENT_ATTR(cache-misses,                PM_LD_MISS_L1);
108
109 CACHE_EVENT_ATTR(L1-dcache-load-misses,         PM_LD_MISS_L1);
110 CACHE_EVENT_ATTR(L1-dcache-store-misses,        PM_ST_MISS_L1);
111 CACHE_EVENT_ATTR(L1-icache-load-misses,         PM_L1_ICACHE_MISS);
112 CACHE_EVENT_ATTR(LLC-load-misses,               PM_DATA_FROM_L3MISS);
113 CACHE_EVENT_ATTR(branch-load-misses,            PM_BR_MPRED_CMPL);
114 CACHE_EVENT_ATTR(dTLB-load-misses,              PM_DTLB_MISS);
115 CACHE_EVENT_ATTR(iTLB-load-misses,              PM_ITLB_MISS);
116
117 static struct attribute *generic_compat_events_attr[] = {
118         GENERIC_EVENT_PTR(PM_CYC),
119         GENERIC_EVENT_PTR(PM_INST_CMPL),
120         GENERIC_EVENT_PTR(PM_NO_INST_AVAIL),
121         GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
122         GENERIC_EVENT_PTR(PM_LD_MISS_L1),
123         CACHE_EVENT_PTR(PM_LD_MISS_L1),
124         CACHE_EVENT_PTR(PM_ST_MISS_L1),
125         CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
126         CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
127         CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
128         CACHE_EVENT_PTR(PM_DTLB_MISS),
129         CACHE_EVENT_PTR(PM_ITLB_MISS),
130         NULL
131 };
132
133 static const struct attribute_group generic_compat_pmu_events_group = {
134         .name = "events",
135         .attrs = generic_compat_events_attr,
136 };
137
138 PMU_FORMAT_ATTR(event,          "config:0-19");
139 PMU_FORMAT_ATTR(pmcxsel,        "config:0-7");
140 PMU_FORMAT_ATTR(pmc,            "config:16-19");
141
142 static struct attribute *generic_compat_pmu_format_attr[] = {
143         &format_attr_event.attr,
144         &format_attr_pmcxsel.attr,
145         &format_attr_pmc.attr,
146         NULL,
147 };
148
149 static const struct attribute_group generic_compat_pmu_format_group = {
150         .name = "format",
151         .attrs = generic_compat_pmu_format_attr,
152 };
153
154 static struct attribute *generic_compat_pmu_caps_attrs[] = {
155         NULL
156 };
157
158 static struct attribute_group generic_compat_pmu_caps_group = {
159         .name  = "caps",
160         .attrs = generic_compat_pmu_caps_attrs,
161 };
162
163 static const struct attribute_group *generic_compat_pmu_attr_groups[] = {
164         &generic_compat_pmu_format_group,
165         &generic_compat_pmu_events_group,
166         &generic_compat_pmu_caps_group,
167         NULL,
168 };
169
170 static int compat_generic_events[] = {
171         [PERF_COUNT_HW_CPU_CYCLES] =                    PM_CYC,
172         [PERF_COUNT_HW_INSTRUCTIONS] =                  PM_INST_CMPL,
173         [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =       PM_NO_INST_AVAIL,
174         [PERF_COUNT_HW_BRANCH_MISSES] =                 PM_BR_MPRED_CMPL,
175         [PERF_COUNT_HW_CACHE_MISSES] =                  PM_LD_MISS_L1,
176 };
177
178 #define C(x)    PERF_COUNT_HW_CACHE_##x
179
180 /*
181  * Table of generalized cache-related events.
182  * 0 means not supported, -1 means nonsensical, other values
183  * are event codes.
184  */
185 static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
186         [ C(L1D) ] = {
187                 [ C(OP_READ) ] = {
188                         [ C(RESULT_ACCESS) ] = 0,
189                         [ C(RESULT_MISS)   ] = PM_LD_MISS_L1,
190                 },
191                 [ C(OP_WRITE) ] = {
192                         [ C(RESULT_ACCESS) ] = 0,
193                         [ C(RESULT_MISS)   ] = PM_ST_MISS_L1,
194                 },
195                 [ C(OP_PREFETCH) ] = {
196                         [ C(RESULT_ACCESS) ] = 0,
197                         [ C(RESULT_MISS)   ] = 0,
198                 },
199         },
200         [ C(L1I) ] = {
201                 [ C(OP_READ) ] = {
202                         [ C(RESULT_ACCESS) ] = 0,
203                         [ C(RESULT_MISS)   ] = PM_L1_ICACHE_MISS,
204                 },
205                 [ C(OP_WRITE) ] = {
206                         [ C(RESULT_ACCESS) ] = 0,
207                         [ C(RESULT_MISS)   ] = -1,
208                 },
209                 [ C(OP_PREFETCH) ] = {
210                         [ C(RESULT_ACCESS) ] = 0,
211                         [ C(RESULT_MISS)   ] = 0,
212                 },
213         },
214         [ C(LL) ] = {
215                 [ C(OP_READ) ] = {
216                         [ C(RESULT_ACCESS) ] = 0,
217                         [ C(RESULT_MISS)   ] = PM_DATA_FROM_L3MISS,
218                 },
219                 [ C(OP_WRITE) ] = {
220                         [ C(RESULT_ACCESS) ] = 0,
221                         [ C(RESULT_MISS)   ] = 0,
222                 },
223                 [ C(OP_PREFETCH) ] = {
224                         [ C(RESULT_ACCESS) ] = 0,
225                         [ C(RESULT_MISS)   ] = 0,
226                 },
227         },
228         [ C(DTLB) ] = {
229                 [ C(OP_READ) ] = {
230                         [ C(RESULT_ACCESS) ] = 0,
231                         [ C(RESULT_MISS)   ] = PM_DTLB_MISS,
232                 },
233                 [ C(OP_WRITE) ] = {
234                         [ C(RESULT_ACCESS) ] = -1,
235                         [ C(RESULT_MISS)   ] = -1,
236                 },
237                 [ C(OP_PREFETCH) ] = {
238                         [ C(RESULT_ACCESS) ] = -1,
239                         [ C(RESULT_MISS)   ] = -1,
240                 },
241         },
242         [ C(ITLB) ] = {
243                 [ C(OP_READ) ] = {
244                         [ C(RESULT_ACCESS) ] = 0,
245                         [ C(RESULT_MISS)   ] = PM_ITLB_MISS,
246                 },
247                 [ C(OP_WRITE) ] = {
248                         [ C(RESULT_ACCESS) ] = -1,
249                         [ C(RESULT_MISS)   ] = -1,
250                 },
251                 [ C(OP_PREFETCH) ] = {
252                         [ C(RESULT_ACCESS) ] = -1,
253                         [ C(RESULT_MISS)   ] = -1,
254                 },
255         },
256         [ C(BPU) ] = {
257                 [ C(OP_READ) ] = {
258                         [ C(RESULT_ACCESS) ] = 0,
259                         [ C(RESULT_MISS)   ] = PM_BR_MPRED_CMPL,
260                 },
261                 [ C(OP_WRITE) ] = {
262                         [ C(RESULT_ACCESS) ] = -1,
263                         [ C(RESULT_MISS)   ] = -1,
264                 },
265                 [ C(OP_PREFETCH) ] = {
266                         [ C(RESULT_ACCESS) ] = -1,
267                         [ C(RESULT_MISS)   ] = -1,
268                 },
269         },
270         [ C(NODE) ] = {
271                 [ C(OP_READ) ] = {
272                         [ C(RESULT_ACCESS) ] = -1,
273                         [ C(RESULT_MISS)   ] = -1,
274                 },
275                 [ C(OP_WRITE) ] = {
276                         [ C(RESULT_ACCESS) ] = -1,
277                         [ C(RESULT_MISS)   ] = -1,
278                 },
279                 [ C(OP_PREFETCH) ] = {
280                         [ C(RESULT_ACCESS) ] = -1,
281                         [ C(RESULT_MISS)   ] = -1,
282                 },
283         },
284 };
285
286 #undef C
287
288 /*
289  * We set MMCR0[CC5-6RUN] so we can use counters 5 and 6 for
290  * PM_INST_CMPL and PM_CYC.
291  */
292 static int generic_compute_mmcr(u64 event[], int n_ev,
293                                 unsigned int hwc[], struct mmcr_regs *mmcr,
294                                 struct perf_event *pevents[], u32 flags)
295 {
296         int ret;
297
298         ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
299         if (!ret)
300                 mmcr->mmcr0 |= MMCR0_C56RUN;
301         return ret;
302 }
303
304 static struct power_pmu generic_compat_pmu = {
305         .name                   = "ISAv3",
306         .n_counter              = MAX_PMU_COUNTERS,
307         .add_fields             = ISA207_ADD_FIELDS,
308         .test_adder             = ISA207_TEST_ADDER,
309         .compute_mmcr           = generic_compute_mmcr,
310         .get_constraint         = isa207_get_constraint,
311         .get_alternatives       = generic_get_alternatives,
312         .disable_pmc            = isa207_disable_pmc,
313         .flags                  = PPMU_HAS_SIER | PPMU_ARCH_207S,
314         .n_generic              = ARRAY_SIZE(compat_generic_events),
315         .generic_events         = compat_generic_events,
316         .cache_events           = &generic_compat_cache_events,
317         .attr_groups            = generic_compat_pmu_attr_groups,
318 };
319
320 int __init init_generic_compat_pmu(void)
321 {
322         int rc = 0;
323
324         /*
325          * From ISA v2.07 on, PMU features are architected;
326          * we require >= v3.0 because (a) that has PM_LD_CMPL and
327          * PM_INST_CMPL_ALT, which v2.07 doesn't have, and
328          * (b) we don't expect any non-IBM Power ISA
329          * implementations that conform to v2.07 but not v3.0.
330          */
331         if (!cpu_has_feature(CPU_FTR_ARCH_300))
332                 return -ENODEV;
333
334         rc = register_power_pmu(&generic_compat_pmu);
335         if (rc)
336                 return rc;
337
338         /* Tell userspace that EBB is supported */
339         cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
340
341         return 0;
342 }