Commit | Line | Data |
---|---|---|
e9991434 AP |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * RISC-V performance counter support. | |
4 | * | |
5 | * Copyright (C) 2021 Western Digital Corporation or its affiliates. | |
6 | * | |
7 | * This code is based on ARM perf event code which is in turn based on | |
8 | * sparc64 and x86 code. | |
9 | */ | |
10 | ||
11 | #define pr_fmt(fmt) "riscv-pmu-sbi: " fmt | |
12 | ||
13 | #include <linux/mod_devicetable.h> | |
14 | #include <linux/perf/riscv_pmu.h> | |
15 | #include <linux/platform_device.h> | |
4905ec2f AP |
16 | #include <linux/irq.h> |
17 | #include <linux/irqdomain.h> | |
18 | #include <linux/of_irq.h> | |
19 | #include <linux/of.h> | |
e9a023f2 | 20 | #include <linux/cpu_pm.h> |
096b52fd | 21 | #include <linux/sched/clock.h> |
e9991434 | 22 | |
65e9fb08 | 23 | #include <asm/errata_list.h> |
e9991434 | 24 | #include <asm/sbi.h> |
e72c4333 | 25 | #include <asm/cpufeature.h> |
e9991434 | 26 | |
cc4c07c8 AG |
27 | #define SYSCTL_NO_USER_ACCESS 0 |
28 | #define SYSCTL_USER_ACCESS 1 | |
29 | #define SYSCTL_LEGACY 2 | |
30 | ||
31 | #define PERF_EVENT_FLAG_NO_USER_ACCESS BIT(SYSCTL_NO_USER_ACCESS) | |
32 | #define PERF_EVENT_FLAG_USER_ACCESS BIT(SYSCTL_USER_ACCESS) | |
33 | #define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY) | |
34 | ||
26fabd6d NS |
35 | PMU_FORMAT_ATTR(event, "config:0-47"); |
36 | PMU_FORMAT_ATTR(firmware, "config:63"); | |
37 | ||
38 | static struct attribute *riscv_arch_formats_attr[] = { | |
39 | &format_attr_event.attr, | |
40 | &format_attr_firmware.attr, | |
41 | NULL, | |
42 | }; | |
43 | ||
44 | static struct attribute_group riscv_pmu_format_group = { | |
45 | .name = "format", | |
46 | .attrs = riscv_arch_formats_attr, | |
47 | }; | |
48 | ||
49 | static const struct attribute_group *riscv_pmu_attr_groups[] = { | |
50 | &riscv_pmu_format_group, | |
51 | NULL, | |
52 | }; | |
53 | ||
cc4c07c8 AG |
54 | /* Allow user mode access by default */ |
55 | static int sysctl_perf_user_access __read_mostly = SYSCTL_USER_ACCESS; | |
56 | ||
c7a9dcea | 57 | /* |
585e351f | 58 | * RISC-V doesn't have heterogeneous harts yet. This need to be part of |
e9991434 AP |
59 | * per_cpu in case of harts with different pmu counters |
60 | */ | |
61 | static union sbi_pmu_ctr_info *pmu_ctr_list; | |
65e9fb08 HS |
62 | static bool riscv_pmu_use_irq; |
63 | static unsigned int riscv_pmu_irq_num; | |
4905ec2f | 64 | static unsigned int riscv_pmu_irq; |
e9991434 | 65 | |
585e351f AP |
66 | /* Cache the available counters in a bitmask */ |
67 | static unsigned long cmask; | |
68 | ||
e9991434 AP |
69 | struct sbi_pmu_event_data { |
70 | union { | |
71 | union { | |
72 | struct hw_gen_event { | |
73 | uint32_t event_code:16; | |
74 | uint32_t event_type:4; | |
75 | uint32_t reserved:12; | |
76 | } hw_gen_event; | |
77 | struct hw_cache_event { | |
78 | uint32_t result_id:1; | |
79 | uint32_t op_id:2; | |
80 | uint32_t cache_id:13; | |
81 | uint32_t event_type:4; | |
82 | uint32_t reserved:12; | |
83 | } hw_cache_event; | |
84 | }; | |
85 | uint32_t event_idx; | |
86 | }; | |
87 | }; | |
88 | ||
89 | static const struct sbi_pmu_event_data pmu_hw_event_map[] = { | |
90 | [PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = { | |
91 | SBI_PMU_HW_CPU_CYCLES, | |
92 | SBI_PMU_EVENT_TYPE_HW, 0}}, | |
93 | [PERF_COUNT_HW_INSTRUCTIONS] = {.hw_gen_event = { | |
94 | SBI_PMU_HW_INSTRUCTIONS, | |
95 | SBI_PMU_EVENT_TYPE_HW, 0}}, | |
96 | [PERF_COUNT_HW_CACHE_REFERENCES] = {.hw_gen_event = { | |
97 | SBI_PMU_HW_CACHE_REFERENCES, | |
98 | SBI_PMU_EVENT_TYPE_HW, 0}}, | |
99 | [PERF_COUNT_HW_CACHE_MISSES] = {.hw_gen_event = { | |
100 | SBI_PMU_HW_CACHE_MISSES, | |
101 | SBI_PMU_EVENT_TYPE_HW, 0}}, | |
102 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {.hw_gen_event = { | |
103 | SBI_PMU_HW_BRANCH_INSTRUCTIONS, | |
104 | SBI_PMU_EVENT_TYPE_HW, 0}}, | |
105 | [PERF_COUNT_HW_BRANCH_MISSES] = {.hw_gen_event = { | |
106 | SBI_PMU_HW_BRANCH_MISSES, | |
107 | SBI_PMU_EVENT_TYPE_HW, 0}}, | |
108 | [PERF_COUNT_HW_BUS_CYCLES] = {.hw_gen_event = { | |
109 | SBI_PMU_HW_BUS_CYCLES, | |
110 | SBI_PMU_EVENT_TYPE_HW, 0}}, | |
111 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {.hw_gen_event = { | |
112 | SBI_PMU_HW_STALLED_CYCLES_FRONTEND, | |
113 | SBI_PMU_EVENT_TYPE_HW, 0}}, | |
114 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = {.hw_gen_event = { | |
115 | SBI_PMU_HW_STALLED_CYCLES_BACKEND, | |
116 | SBI_PMU_EVENT_TYPE_HW, 0}}, | |
117 | [PERF_COUNT_HW_REF_CPU_CYCLES] = {.hw_gen_event = { | |
118 | SBI_PMU_HW_REF_CPU_CYCLES, | |
119 | SBI_PMU_EVENT_TYPE_HW, 0}}, | |
120 | }; | |
121 | ||
122 | #define C(x) PERF_COUNT_HW_CACHE_##x | |
123 | static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] | |
124 | [PERF_COUNT_HW_CACHE_OP_MAX] | |
125 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | |
126 | [C(L1D)] = { | |
127 | [C(OP_READ)] = { | |
128 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
129 | C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
130 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
131 | C(OP_READ), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
132 | }, | |
133 | [C(OP_WRITE)] = { | |
134 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
135 | C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
136 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
137 | C(OP_WRITE), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
138 | }, | |
139 | [C(OP_PREFETCH)] = { | |
140 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
141 | C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
142 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
143 | C(OP_PREFETCH), C(L1D), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
144 | }, | |
145 | }, | |
146 | [C(L1I)] = { | |
147 | [C(OP_READ)] = { | |
148 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
149 | C(OP_READ), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
150 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), C(OP_READ), | |
151 | C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
152 | }, | |
153 | [C(OP_WRITE)] = { | |
154 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
155 | C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
156 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
157 | C(OP_WRITE), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
158 | }, | |
159 | [C(OP_PREFETCH)] = { | |
160 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
161 | C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
162 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
163 | C(OP_PREFETCH), C(L1I), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
164 | }, | |
165 | }, | |
166 | [C(LL)] = { | |
167 | [C(OP_READ)] = { | |
168 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
169 | C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
170 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
171 | C(OP_READ), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
172 | }, | |
173 | [C(OP_WRITE)] = { | |
174 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
175 | C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
176 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
177 | C(OP_WRITE), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
178 | }, | |
179 | [C(OP_PREFETCH)] = { | |
180 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
181 | C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
182 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
183 | C(OP_PREFETCH), C(LL), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
184 | }, | |
185 | }, | |
186 | [C(DTLB)] = { | |
187 | [C(OP_READ)] = { | |
188 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
189 | C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
190 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
191 | C(OP_READ), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
192 | }, | |
193 | [C(OP_WRITE)] = { | |
194 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
195 | C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
196 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
197 | C(OP_WRITE), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
198 | }, | |
199 | [C(OP_PREFETCH)] = { | |
200 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
201 | C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
202 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
203 | C(OP_PREFETCH), C(DTLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
204 | }, | |
205 | }, | |
206 | [C(ITLB)] = { | |
207 | [C(OP_READ)] = { | |
208 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
209 | C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
210 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
211 | C(OP_READ), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
212 | }, | |
213 | [C(OP_WRITE)] = { | |
214 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
215 | C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
216 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
217 | C(OP_WRITE), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
218 | }, | |
219 | [C(OP_PREFETCH)] = { | |
220 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
221 | C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
222 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
223 | C(OP_PREFETCH), C(ITLB), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
224 | }, | |
225 | }, | |
226 | [C(BPU)] = { | |
227 | [C(OP_READ)] = { | |
228 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
229 | C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
230 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
231 | C(OP_READ), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
232 | }, | |
233 | [C(OP_WRITE)] = { | |
234 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
235 | C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
236 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
237 | C(OP_WRITE), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
238 | }, | |
239 | [C(OP_PREFETCH)] = { | |
240 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
241 | C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
242 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
243 | C(OP_PREFETCH), C(BPU), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
244 | }, | |
245 | }, | |
246 | [C(NODE)] = { | |
247 | [C(OP_READ)] = { | |
248 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
249 | C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
250 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
251 | C(OP_READ), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
252 | }, | |
253 | [C(OP_WRITE)] = { | |
254 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
255 | C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
256 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
257 | C(OP_WRITE), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
258 | }, | |
259 | [C(OP_PREFETCH)] = { | |
260 | [C(RESULT_ACCESS)] = {.hw_cache_event = {C(RESULT_ACCESS), | |
261 | C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
262 | [C(RESULT_MISS)] = {.hw_cache_event = {C(RESULT_MISS), | |
263 | C(OP_PREFETCH), C(NODE), SBI_PMU_EVENT_TYPE_CACHE, 0}}, | |
264 | }, | |
265 | }, | |
266 | }; | |
267 | ||
268 | static int pmu_sbi_ctr_get_width(int idx) | |
269 | { | |
270 | return pmu_ctr_list[idx].width; | |
271 | } | |
272 | ||
273 | static bool pmu_sbi_ctr_is_fw(int cidx) | |
274 | { | |
275 | union sbi_pmu_ctr_info *info; | |
276 | ||
277 | info = &pmu_ctr_list[cidx]; | |
278 | if (!info) | |
279 | return false; | |
280 | ||
281 | return (info->type == SBI_PMU_CTR_TYPE_FW) ? true : false; | |
282 | } | |
283 | ||
585e351f AP |
284 | /* |
285 | * Returns the counter width of a programmable counter and number of hardware | |
286 | * counters. As we don't support heterogeneous CPUs yet, it is okay to just | |
287 | * return the counter width of the first programmable counter. | |
288 | */ | |
289 | int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr) | |
290 | { | |
291 | int i; | |
292 | union sbi_pmu_ctr_info *info; | |
293 | u32 hpm_width = 0, hpm_count = 0; | |
294 | ||
295 | if (!cmask) | |
296 | return -EINVAL; | |
297 | ||
298 | for_each_set_bit(i, &cmask, RISCV_MAX_COUNTERS) { | |
299 | info = &pmu_ctr_list[i]; | |
300 | if (!info) | |
301 | continue; | |
302 | if (!hpm_width && info->csr != CSR_CYCLE && info->csr != CSR_INSTRET) | |
303 | hpm_width = info->width; | |
304 | if (info->type == SBI_PMU_CTR_TYPE_HW) | |
305 | hpm_count++; | |
306 | } | |
307 | ||
308 | *hw_ctr_width = hpm_width; | |
309 | *num_hw_ctr = hpm_count; | |
310 | ||
311 | return 0; | |
312 | } | |
313 | EXPORT_SYMBOL_GPL(riscv_pmu_get_hpm_info); | |
314 | ||
cc4c07c8 AG |
315 | static uint8_t pmu_sbi_csr_index(struct perf_event *event) |
316 | { | |
317 | return pmu_ctr_list[event->hw.idx].csr - CSR_CYCLE; | |
318 | } | |
319 | ||
8929283a AP |
320 | static unsigned long pmu_sbi_get_filter_flags(struct perf_event *event) |
321 | { | |
322 | unsigned long cflags = 0; | |
323 | bool guest_events = false; | |
324 | ||
325 | if (event->attr.config1 & RISCV_PMU_CONFIG1_GUEST_EVENTS) | |
326 | guest_events = true; | |
327 | if (event->attr.exclude_kernel) | |
328 | cflags |= guest_events ? SBI_PMU_CFG_FLAG_SET_VSINH : SBI_PMU_CFG_FLAG_SET_SINH; | |
329 | if (event->attr.exclude_user) | |
330 | cflags |= guest_events ? SBI_PMU_CFG_FLAG_SET_VUINH : SBI_PMU_CFG_FLAG_SET_UINH; | |
331 | if (guest_events && event->attr.exclude_hv) | |
332 | cflags |= SBI_PMU_CFG_FLAG_SET_SINH; | |
333 | if (event->attr.exclude_host) | |
334 | cflags |= SBI_PMU_CFG_FLAG_SET_UINH | SBI_PMU_CFG_FLAG_SET_SINH; | |
335 | if (event->attr.exclude_guest) | |
336 | cflags |= SBI_PMU_CFG_FLAG_SET_VSINH | SBI_PMU_CFG_FLAG_SET_VUINH; | |
337 | ||
338 | return cflags; | |
339 | } | |
340 | ||
e9991434 AP |
341 | static int pmu_sbi_ctr_get_idx(struct perf_event *event) |
342 | { | |
343 | struct hw_perf_event *hwc = &event->hw; | |
344 | struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); | |
345 | struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); | |
346 | struct sbiret ret; | |
347 | int idx; | |
cc4c07c8 | 348 | uint64_t cbase = 0, cmask = rvpmu->cmask; |
e9991434 AP |
349 | unsigned long cflags = 0; |
350 | ||
8929283a | 351 | cflags = pmu_sbi_get_filter_flags(event); |
cc4c07c8 AG |
352 | |
353 | /* | |
354 | * In legacy mode, we have to force the fixed counters for those events | |
355 | * but not in the user access mode as we want to use the other counters | |
356 | * that support sampling/filtering. | |
357 | */ | |
358 | if (hwc->flags & PERF_EVENT_FLAG_LEGACY) { | |
359 | if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) { | |
360 | cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH; | |
361 | cmask = 1; | |
362 | } else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) { | |
363 | cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH; | |
364 | cmask = 1UL << (CSR_INSTRET - CSR_CYCLE); | |
365 | } | |
366 | } | |
367 | ||
e9991434 | 368 | /* retrieve the available counter index */ |
0209b583 | 369 | #if defined(CONFIG_32BIT) |
1537bf26 | 370 | ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, |
cc4c07c8 | 371 | cmask, cflags, hwc->event_base, hwc->config, |
1537bf26 | 372 | hwc->config >> 32); |
0209b583 | 373 | #else |
1537bf26 | 374 | ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, |
cc4c07c8 | 375 | cmask, cflags, hwc->event_base, hwc->config, 0); |
0209b583 | 376 | #endif |
e9991434 AP |
377 | if (ret.error) { |
378 | pr_debug("Not able to find a counter for event %lx config %llx\n", | |
379 | hwc->event_base, hwc->config); | |
380 | return sbi_err_map_linux_errno(ret.error); | |
381 | } | |
382 | ||
383 | idx = ret.value; | |
1537bf26 | 384 | if (!test_bit(idx, &rvpmu->cmask) || !pmu_ctr_list[idx].value) |
e9991434 AP |
385 | return -ENOENT; |
386 | ||
387 | /* Additional sanity check for the counter id */ | |
388 | if (pmu_sbi_ctr_is_fw(idx)) { | |
389 | if (!test_and_set_bit(idx, cpuc->used_fw_ctrs)) | |
390 | return idx; | |
391 | } else { | |
392 | if (!test_and_set_bit(idx, cpuc->used_hw_ctrs)) | |
393 | return idx; | |
394 | } | |
395 | ||
396 | return -ENOENT; | |
397 | } | |
398 | ||
399 | static void pmu_sbi_ctr_clear_idx(struct perf_event *event) | |
400 | { | |
401 | ||
402 | struct hw_perf_event *hwc = &event->hw; | |
403 | struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); | |
404 | struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); | |
405 | int idx = hwc->idx; | |
406 | ||
407 | if (pmu_sbi_ctr_is_fw(idx)) | |
408 | clear_bit(idx, cpuc->used_fw_ctrs); | |
409 | else | |
410 | clear_bit(idx, cpuc->used_hw_ctrs); | |
411 | } | |
412 | ||
413 | static int pmu_event_find_cache(u64 config) | |
414 | { | |
415 | unsigned int cache_type, cache_op, cache_result, ret; | |
416 | ||
417 | cache_type = (config >> 0) & 0xff; | |
418 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) | |
419 | return -EINVAL; | |
420 | ||
421 | cache_op = (config >> 8) & 0xff; | |
422 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) | |
423 | return -EINVAL; | |
424 | ||
425 | cache_result = (config >> 16) & 0xff; | |
426 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | |
427 | return -EINVAL; | |
428 | ||
429 | ret = pmu_cache_event_map[cache_type][cache_op][cache_result].event_idx; | |
430 | ||
431 | return ret; | |
432 | } | |
433 | ||
434 | static bool pmu_sbi_is_fw_event(struct perf_event *event) | |
435 | { | |
436 | u32 type = event->attr.type; | |
437 | u64 config = event->attr.config; | |
438 | ||
439 | if ((type == PERF_TYPE_RAW) && ((config >> 63) == 1)) | |
440 | return true; | |
441 | else | |
442 | return false; | |
443 | } | |
444 | ||
445 | static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) | |
446 | { | |
447 | u32 type = event->attr.type; | |
448 | u64 config = event->attr.config; | |
449 | int bSoftware; | |
450 | u64 raw_config_val; | |
451 | int ret; | |
452 | ||
453 | switch (type) { | |
454 | case PERF_TYPE_HARDWARE: | |
455 | if (config >= PERF_COUNT_HW_MAX) | |
456 | return -EINVAL; | |
457 | ret = pmu_hw_event_map[event->attr.config].event_idx; | |
458 | break; | |
459 | case PERF_TYPE_HW_CACHE: | |
460 | ret = pmu_event_find_cache(config); | |
461 | break; | |
462 | case PERF_TYPE_RAW: | |
463 | /* | |
464 | * As per SBI specification, the upper 16 bits must be unused for | |
465 | * a raw event. Use the MSB (63b) to distinguish between hardware | |
466 | * raw event and firmware events. | |
467 | */ | |
468 | bSoftware = config >> 63; | |
469 | raw_config_val = config & RISCV_PMU_RAW_EVENT_MASK; | |
470 | if (bSoftware) { | |
9f828bc3 MC |
471 | ret = (raw_config_val & 0xFFFF) | |
472 | (SBI_PMU_EVENT_TYPE_FW << 16); | |
e9991434 AP |
473 | } else { |
474 | ret = RISCV_PMU_RAW_EVENT_IDX; | |
475 | *econfig = raw_config_val; | |
476 | } | |
477 | break; | |
478 | default: | |
479 | ret = -EINVAL; | |
480 | break; | |
481 | } | |
482 | ||
483 | return ret; | |
484 | } | |
485 | ||
486 | static u64 pmu_sbi_ctr_read(struct perf_event *event) | |
487 | { | |
488 | struct hw_perf_event *hwc = &event->hw; | |
489 | int idx = hwc->idx; | |
490 | struct sbiret ret; | |
491 | union sbi_pmu_ctr_info info; | |
492 | u64 val = 0; | |
493 | ||
494 | if (pmu_sbi_is_fw_event(event)) { | |
495 | ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, | |
496 | hwc->idx, 0, 0, 0, 0, 0); | |
497 | if (!ret.error) | |
498 | val = ret.value; | |
499 | } else { | |
500 | info = pmu_ctr_list[idx]; | |
501 | val = riscv_pmu_ctr_read_csr(info.csr); | |
502 | if (IS_ENABLED(CONFIG_32BIT)) | |
503 | val = ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 31 | val; | |
504 | } | |
505 | ||
506 | return val; | |
507 | } | |
508 | ||
cc4c07c8 AG |
509 | static void pmu_sbi_set_scounteren(void *arg) |
510 | { | |
511 | struct perf_event *event = (struct perf_event *)arg; | |
512 | ||
3fec3233 AG |
513 | if (event->hw.idx != -1) |
514 | csr_write(CSR_SCOUNTEREN, | |
515 | csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event))); | |
cc4c07c8 AG |
516 | } |
517 | ||
518 | static void pmu_sbi_reset_scounteren(void *arg) | |
519 | { | |
520 | struct perf_event *event = (struct perf_event *)arg; | |
521 | ||
3fec3233 AG |
522 | if (event->hw.idx != -1) |
523 | csr_write(CSR_SCOUNTEREN, | |
524 | csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event))); | |
cc4c07c8 AG |
525 | } |
526 | ||
e9991434 AP |
527 | static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) |
528 | { | |
529 | struct sbiret ret; | |
530 | struct hw_perf_event *hwc = &event->hw; | |
531 | unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE; | |
532 | ||
0209b583 | 533 | #if defined(CONFIG_32BIT) |
e9991434 AP |
534 | ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx, |
535 | 1, flag, ival, ival >> 32, 0); | |
0209b583 AP |
536 | #else |
537 | ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx, | |
538 | 1, flag, ival, 0, 0); | |
539 | #endif | |
e9991434 AP |
540 | if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED)) |
541 | pr_err("Starting counter idx %d failed with error %d\n", | |
542 | hwc->idx, sbi_err_map_linux_errno(ret.error)); | |
cc4c07c8 AG |
543 | |
544 | if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && | |
545 | (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) | |
546 | pmu_sbi_set_scounteren((void *)event); | |
e9991434 AP |
547 | } |
548 | ||
549 | static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) | |
550 | { | |
551 | struct sbiret ret; | |
552 | struct hw_perf_event *hwc = &event->hw; | |
553 | ||
cc4c07c8 AG |
554 | if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && |
555 | (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) | |
556 | pmu_sbi_reset_scounteren((void *)event); | |
557 | ||
e9991434 AP |
558 | ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0); |
559 | if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) && | |
560 | flag != SBI_PMU_STOP_FLAG_RESET) | |
561 | pr_err("Stopping counter idx %d failed with error %d\n", | |
562 | hwc->idx, sbi_err_map_linux_errno(ret.error)); | |
563 | } | |
564 | ||
565 | static int pmu_sbi_find_num_ctrs(void) | |
566 | { | |
567 | struct sbiret ret; | |
568 | ||
569 | ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0); | |
570 | if (!ret.error) | |
571 | return ret.value; | |
572 | else | |
573 | return sbi_err_map_linux_errno(ret.error); | |
574 | } | |
575 | ||
1537bf26 | 576 | static int pmu_sbi_get_ctrinfo(int nctr, unsigned long *mask) |
e9991434 AP |
577 | { |
578 | struct sbiret ret; | |
579 | int i, num_hw_ctr = 0, num_fw_ctr = 0; | |
580 | union sbi_pmu_ctr_info cinfo; | |
581 | ||
582 | pmu_ctr_list = kcalloc(nctr, sizeof(*pmu_ctr_list), GFP_KERNEL); | |
583 | if (!pmu_ctr_list) | |
584 | return -ENOMEM; | |
585 | ||
20e0fbab | 586 | for (i = 0; i < nctr; i++) { |
e9991434 AP |
587 | ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0); |
588 | if (ret.error) | |
589 | /* The logical counter ids are not expected to be contiguous */ | |
590 | continue; | |
1537bf26 SM |
591 | |
592 | *mask |= BIT(i); | |
593 | ||
e9991434 AP |
594 | cinfo.value = ret.value; |
595 | if (cinfo.type == SBI_PMU_CTR_TYPE_FW) | |
596 | num_fw_ctr++; | |
597 | else | |
598 | num_hw_ctr++; | |
599 | pmu_ctr_list[i].value = cinfo.value; | |
600 | } | |
601 | ||
602 | pr_info("%d firmware and %d hardware counters\n", num_fw_ctr, num_hw_ctr); | |
603 | ||
604 | return 0; | |
605 | } | |
606 | ||
4905ec2f AP |
607 | static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu) |
608 | { | |
c7a9dcea | 609 | /* |
4905ec2f AP |
610 | * No need to check the error because we are disabling all the counters |
611 | * which may include counters that are not enabled yet. | |
612 | */ | |
613 | sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, | |
1537bf26 | 614 | 0, pmu->cmask, 0, 0, 0, 0); |
4905ec2f AP |
615 | } |
616 | ||
617 | static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu) | |
618 | { | |
619 | struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); | |
620 | ||
621 | /* No need to check the error here as we can't do anything about the error */ | |
622 | sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0, | |
623 | cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0); | |
624 | } | |
625 | ||
c7a9dcea | 626 | /* |
4905ec2f AP |
627 | * This function starts all the used counters in two step approach. |
628 | * Any counter that did not overflow can be start in a single step | |
629 | * while the overflowed counters need to be started with updated initialization | |
630 | * value. | |
631 | */ | |
632 | static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu, | |
633 | unsigned long ctr_ovf_mask) | |
634 | { | |
635 | int idx = 0; | |
636 | struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); | |
637 | struct perf_event *event; | |
638 | unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE; | |
639 | unsigned long ctr_start_mask = 0; | |
640 | uint64_t max_period; | |
641 | struct hw_perf_event *hwc; | |
642 | u64 init_val = 0; | |
643 | ||
644 | ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask; | |
645 | ||
646 | /* Start all the counters that did not overflow in a single shot */ | |
647 | sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask, | |
648 | 0, 0, 0, 0); | |
649 | ||
650 | /* Reinitialize and start all the counter that overflowed */ | |
651 | while (ctr_ovf_mask) { | |
652 | if (ctr_ovf_mask & 0x01) { | |
653 | event = cpu_hw_evt->events[idx]; | |
654 | hwc = &event->hw; | |
655 | max_period = riscv_pmu_ctr_get_width_mask(event); | |
656 | init_val = local64_read(&hwc->prev_count) & max_period; | |
acc1b919 AP |
657 | #if defined(CONFIG_32BIT) |
658 | sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1, | |
659 | flag, init_val, init_val >> 32, 0); | |
660 | #else | |
4905ec2f AP |
661 | sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1, |
662 | flag, init_val, 0, 0); | |
acc1b919 | 663 | #endif |
133a6d1f | 664 | perf_event_update_userpage(event); |
4905ec2f AP |
665 | } |
666 | ctr_ovf_mask = ctr_ovf_mask >> 1; | |
667 | idx++; | |
668 | } | |
669 | } | |
670 | ||
671 | static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) | |
672 | { | |
673 | struct perf_sample_data data; | |
674 | struct pt_regs *regs; | |
675 | struct hw_perf_event *hw_evt; | |
676 | union sbi_pmu_ctr_info *info; | |
677 | int lidx, hidx, fidx; | |
678 | struct riscv_pmu *pmu; | |
679 | struct perf_event *event; | |
680 | unsigned long overflow; | |
681 | unsigned long overflowed_ctrs = 0; | |
682 | struct cpu_hw_events *cpu_hw_evt = dev; | |
096b52fd | 683 | u64 start_clock = sched_clock(); |
4905ec2f AP |
684 | |
685 | if (WARN_ON_ONCE(!cpu_hw_evt)) | |
686 | return IRQ_NONE; | |
687 | ||
688 | /* Firmware counter don't support overflow yet */ | |
689 | fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS); | |
c6e316ac AG |
690 | if (fidx == RISCV_MAX_COUNTERS) { |
691 | csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); | |
692 | return IRQ_NONE; | |
693 | } | |
694 | ||
4905ec2f AP |
695 | event = cpu_hw_evt->events[fidx]; |
696 | if (!event) { | |
65e9fb08 | 697 | csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); |
4905ec2f AP |
698 | return IRQ_NONE; |
699 | } | |
700 | ||
701 | pmu = to_riscv_pmu(event->pmu); | |
702 | pmu_sbi_stop_hw_ctrs(pmu); | |
703 | ||
704 | /* Overflow status register should only be read after counter are stopped */ | |
65e9fb08 | 705 | ALT_SBI_PMU_OVERFLOW(overflow); |
4905ec2f | 706 | |
c7a9dcea | 707 | /* |
4905ec2f AP |
708 | * Overflow interrupt pending bit should only be cleared after stopping |
709 | * all the counters to avoid any race condition. | |
710 | */ | |
65e9fb08 | 711 | csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); |
4905ec2f AP |
712 | |
713 | /* No overflow bit is set */ | |
714 | if (!overflow) | |
715 | return IRQ_NONE; | |
716 | ||
717 | regs = get_irq_regs(); | |
718 | ||
719 | for_each_set_bit(lidx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) { | |
720 | struct perf_event *event = cpu_hw_evt->events[lidx]; | |
721 | ||
722 | /* Skip if invalid event or user did not request a sampling */ | |
723 | if (!event || !is_sampling_event(event)) | |
724 | continue; | |
725 | ||
726 | info = &pmu_ctr_list[lidx]; | |
727 | /* Do a sanity check */ | |
728 | if (!info || info->type != SBI_PMU_CTR_TYPE_HW) | |
729 | continue; | |
730 | ||
731 | /* compute hardware counter index */ | |
732 | hidx = info->csr - CSR_CYCLE; | |
733 | /* check if the corresponding bit is set in sscountovf */ | |
734 | if (!(overflow & (1 << hidx))) | |
735 | continue; | |
736 | ||
737 | /* | |
738 | * Keep a track of overflowed counters so that they can be started | |
739 | * with updated initial value. | |
740 | */ | |
741 | overflowed_ctrs |= 1 << lidx; | |
742 | hw_evt = &event->hw; | |
743 | riscv_pmu_event_update(event); | |
744 | perf_sample_data_init(&data, 0, hw_evt->last_period); | |
745 | if (riscv_pmu_event_set_period(event)) { | |
746 | /* | |
747 | * Unlike other ISAs, RISC-V don't have to disable interrupts | |
748 | * to avoid throttling here. As per the specification, the | |
749 | * interrupt remains disabled until the OF bit is set. | |
750 | * Interrupts are enabled again only during the start. | |
751 | * TODO: We will need to stop the guest counters once | |
752 | * virtualization support is added. | |
753 | */ | |
754 | perf_event_overflow(event, &data, regs); | |
755 | } | |
756 | } | |
096b52fd | 757 | |
4905ec2f | 758 | pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs); |
096b52fd | 759 | perf_sample_event_took(sched_clock() - start_clock); |
4905ec2f AP |
760 | |
761 | return IRQ_HANDLED; | |
762 | } | |
763 | ||
e9991434 AP |
764 | static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) |
765 | { | |
766 | struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node); | |
4905ec2f | 767 | struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); |
e9991434 | 768 | |
5a5294fb | 769 | /* |
cc4c07c8 AG |
770 | * We keep enabling userspace access to CYCLE, TIME and INSTRET via the |
771 | * legacy option but that will be removed in the future. | |
5a5294fb | 772 | */ |
cc4c07c8 AG |
773 | if (sysctl_perf_user_access == SYSCTL_LEGACY) |
774 | csr_write(CSR_SCOUNTEREN, 0x7); | |
775 | else | |
776 | csr_write(CSR_SCOUNTEREN, 0x2); | |
e9991434 AP |
777 | |
778 | /* Stop all the counters so that they can be enabled from perf */ | |
4905ec2f AP |
779 | pmu_sbi_stop_all(pmu); |
780 | ||
65e9fb08 | 781 | if (riscv_pmu_use_irq) { |
4905ec2f | 782 | cpu_hw_evt->irq = riscv_pmu_irq; |
65e9fb08 HS |
783 | csr_clear(CSR_IP, BIT(riscv_pmu_irq_num)); |
784 | csr_set(CSR_IE, BIT(riscv_pmu_irq_num)); | |
4905ec2f AP |
785 | enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE); |
786 | } | |
e9991434 AP |
787 | |
788 | return 0; | |
789 | } | |
790 | ||
791 | static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node) | |
792 | { | |
65e9fb08 | 793 | if (riscv_pmu_use_irq) { |
4905ec2f | 794 | disable_percpu_irq(riscv_pmu_irq); |
65e9fb08 | 795 | csr_clear(CSR_IE, BIT(riscv_pmu_irq_num)); |
4905ec2f AP |
796 | } |
797 | ||
e9991434 AP |
798 | /* Disable all counters access for user mode now */ |
799 | csr_write(CSR_SCOUNTEREN, 0x0); | |
800 | ||
801 | return 0; | |
802 | } | |
803 | ||
4905ec2f AP |
804 | static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pdev) |
805 | { | |
806 | int ret; | |
807 | struct cpu_hw_events __percpu *hw_events = pmu->hw_events; | |
4905ec2f AP |
808 | struct irq_domain *domain = NULL; |
809 | ||
65e9fb08 HS |
810 | if (riscv_isa_extension_available(NULL, SSCOFPMF)) { |
811 | riscv_pmu_irq_num = RV_IRQ_PMU; | |
812 | riscv_pmu_use_irq = true; | |
813 | } else if (IS_ENABLED(CONFIG_ERRATA_THEAD_PMU) && | |
814 | riscv_cached_mvendorid(0) == THEAD_VENDOR_ID && | |
815 | riscv_cached_marchid(0) == 0 && | |
816 | riscv_cached_mimpid(0) == 0) { | |
817 | riscv_pmu_irq_num = THEAD_C9XX_RV_IRQ_PMU; | |
818 | riscv_pmu_use_irq = true; | |
819 | } | |
820 | ||
821 | if (!riscv_pmu_use_irq) | |
4905ec2f AP |
822 | return -EOPNOTSUPP; |
823 | ||
ca7473cb S |
824 | domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), |
825 | DOMAIN_BUS_ANY); | |
4905ec2f AP |
826 | if (!domain) { |
827 | pr_err("Failed to find INTC IRQ root domain\n"); | |
828 | return -ENODEV; | |
829 | } | |
830 | ||
65e9fb08 | 831 | riscv_pmu_irq = irq_create_mapping(domain, riscv_pmu_irq_num); |
4905ec2f AP |
832 | if (!riscv_pmu_irq) { |
833 | pr_err("Failed to map PMU interrupt for node\n"); | |
834 | return -ENODEV; | |
835 | } | |
836 | ||
837 | ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, "riscv-pmu", hw_events); | |
838 | if (ret) { | |
839 | pr_err("registering percpu irq failed [%d]\n", ret); | |
840 | return ret; | |
841 | } | |
842 | ||
843 | return 0; | |
844 | } | |
845 | ||
e9a023f2 EL |
846 | #ifdef CONFIG_CPU_PM |
847 | static int riscv_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, | |
848 | void *v) | |
849 | { | |
850 | struct riscv_pmu *rvpmu = container_of(b, struct riscv_pmu, riscv_pm_nb); | |
851 | struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); | |
852 | int enabled = bitmap_weight(cpuc->used_hw_ctrs, RISCV_MAX_COUNTERS); | |
853 | struct perf_event *event; | |
854 | int idx; | |
855 | ||
856 | if (!enabled) | |
857 | return NOTIFY_OK; | |
858 | ||
859 | for (idx = 0; idx < RISCV_MAX_COUNTERS; idx++) { | |
860 | event = cpuc->events[idx]; | |
861 | if (!event) | |
862 | continue; | |
863 | ||
864 | switch (cmd) { | |
865 | case CPU_PM_ENTER: | |
866 | /* | |
867 | * Stop and update the counter | |
868 | */ | |
869 | riscv_pmu_stop(event, PERF_EF_UPDATE); | |
870 | break; | |
871 | case CPU_PM_EXIT: | |
872 | case CPU_PM_ENTER_FAILED: | |
873 | /* | |
874 | * Restore and enable the counter. | |
e9a023f2 | 875 | */ |
1c38b061 | 876 | riscv_pmu_start(event, PERF_EF_RELOAD); |
e9a023f2 EL |
877 | break; |
878 | default: | |
879 | break; | |
880 | } | |
881 | } | |
882 | ||
883 | return NOTIFY_OK; | |
884 | } | |
885 | ||
886 | static int riscv_pm_pmu_register(struct riscv_pmu *pmu) | |
887 | { | |
888 | pmu->riscv_pm_nb.notifier_call = riscv_pm_pmu_notify; | |
889 | return cpu_pm_register_notifier(&pmu->riscv_pm_nb); | |
890 | } | |
891 | ||
892 | static void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) | |
893 | { | |
894 | cpu_pm_unregister_notifier(&pmu->riscv_pm_nb); | |
895 | } | |
896 | #else | |
897 | static inline int riscv_pm_pmu_register(struct riscv_pmu *pmu) { return 0; } | |
898 | static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { } | |
899 | #endif | |
900 | ||
901 | static void riscv_pmu_destroy(struct riscv_pmu *pmu) | |
902 | { | |
903 | riscv_pm_pmu_unregister(pmu); | |
904 | cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); | |
905 | } | |
906 | ||
cc4c07c8 AG |
907 | static void pmu_sbi_event_init(struct perf_event *event) |
908 | { | |
909 | /* | |
910 | * The permissions are set at event_init so that we do not depend | |
911 | * on the sysctl value that can change. | |
912 | */ | |
913 | if (sysctl_perf_user_access == SYSCTL_NO_USER_ACCESS) | |
914 | event->hw.flags |= PERF_EVENT_FLAG_NO_USER_ACCESS; | |
915 | else if (sysctl_perf_user_access == SYSCTL_USER_ACCESS) | |
916 | event->hw.flags |= PERF_EVENT_FLAG_USER_ACCESS; | |
917 | else | |
918 | event->hw.flags |= PERF_EVENT_FLAG_LEGACY; | |
919 | } | |
920 | ||
921 | static void pmu_sbi_event_mapped(struct perf_event *event, struct mm_struct *mm) | |
922 | { | |
923 | if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS) | |
924 | return; | |
925 | ||
926 | if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) { | |
927 | if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES && | |
928 | event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) { | |
929 | return; | |
930 | } | |
931 | } | |
932 | ||
933 | /* | |
934 | * The user mmapped the event to directly access it: this is where | |
935 | * we determine based on sysctl_perf_user_access if we grant userspace | |
936 | * the direct access to this event. That means that within the same | |
937 | * task, some events may be directly accessible and some other may not, | |
938 | * if the user changes the value of sysctl_perf_user_accesss in the | |
939 | * meantime. | |
940 | */ | |
941 | ||
942 | event->hw.flags |= PERF_EVENT_FLAG_USER_READ_CNT; | |
943 | ||
944 | /* | |
945 | * We must enable userspace access *before* advertising in the user page | |
946 | * that it is possible to do so to avoid any race. | |
947 | * And we must notify all cpus here because threads that currently run | |
948 | * on other cpus will try to directly access the counter too without | |
949 | * calling pmu_sbi_ctr_start. | |
950 | */ | |
951 | if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS) | |
952 | on_each_cpu_mask(mm_cpumask(mm), | |
953 | pmu_sbi_set_scounteren, (void *)event, 1); | |
954 | } | |
955 | ||
956 | static void pmu_sbi_event_unmapped(struct perf_event *event, struct mm_struct *mm) | |
957 | { | |
958 | if (event->hw.flags & PERF_EVENT_FLAG_NO_USER_ACCESS) | |
959 | return; | |
960 | ||
961 | if (event->hw.flags & PERF_EVENT_FLAG_LEGACY) { | |
962 | if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES && | |
963 | event->attr.config != PERF_COUNT_HW_INSTRUCTIONS) { | |
964 | return; | |
965 | } | |
966 | } | |
967 | ||
968 | /* | |
969 | * Here we can directly remove user access since the user does not have | |
970 | * access to the user page anymore so we avoid the racy window where the | |
971 | * user could have read cap_user_rdpmc to true right before we disable | |
972 | * it. | |
973 | */ | |
974 | event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT; | |
975 | ||
976 | if (event->hw.flags & PERF_EVENT_FLAG_USER_ACCESS) | |
977 | on_each_cpu_mask(mm_cpumask(mm), | |
978 | pmu_sbi_reset_scounteren, (void *)event, 1); | |
979 | } | |
980 | ||
981 | static void riscv_pmu_update_counter_access(void *info) | |
982 | { | |
983 | if (sysctl_perf_user_access == SYSCTL_LEGACY) | |
984 | csr_write(CSR_SCOUNTEREN, 0x7); | |
985 | else | |
986 | csr_write(CSR_SCOUNTEREN, 0x2); | |
987 | } | |
988 | ||
989 | static int riscv_pmu_proc_user_access_handler(struct ctl_table *table, | |
990 | int write, void *buffer, | |
991 | size_t *lenp, loff_t *ppos) | |
992 | { | |
993 | int prev = sysctl_perf_user_access; | |
994 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | |
995 | ||
996 | /* | |
997 | * Test against the previous value since we clear SCOUNTEREN when | |
998 | * sysctl_perf_user_access is set to SYSCTL_USER_ACCESS, but we should | |
999 | * not do that if that was already the case. | |
1000 | */ | |
1001 | if (ret || !write || prev == sysctl_perf_user_access) | |
1002 | return ret; | |
1003 | ||
1004 | on_each_cpu(riscv_pmu_update_counter_access, NULL, 1); | |
1005 | ||
1006 | return 0; | |
1007 | } | |
1008 | ||
1009 | static struct ctl_table sbi_pmu_sysctl_table[] = { | |
1010 | { | |
1011 | .procname = "perf_user_access", | |
1012 | .data = &sysctl_perf_user_access, | |
1013 | .maxlen = sizeof(unsigned int), | |
1014 | .mode = 0644, | |
1015 | .proc_handler = riscv_pmu_proc_user_access_handler, | |
1016 | .extra1 = SYSCTL_ZERO, | |
1017 | .extra2 = SYSCTL_TWO, | |
1018 | }, | |
1019 | { } | |
1020 | }; | |
1021 | ||
e9991434 AP |
1022 | static int pmu_sbi_device_probe(struct platform_device *pdev) |
1023 | { | |
1024 | struct riscv_pmu *pmu = NULL; | |
4905ec2f | 1025 | int ret = -ENODEV; |
1537bf26 | 1026 | int num_counters; |
e9991434 AP |
1027 | |
1028 | pr_info("SBI PMU extension is available\n"); | |
1029 | pmu = riscv_pmu_alloc(); | |
1030 | if (!pmu) | |
1031 | return -ENOMEM; | |
1032 | ||
1033 | num_counters = pmu_sbi_find_num_ctrs(); | |
1034 | if (num_counters < 0) { | |
1035 | pr_err("SBI PMU extension doesn't provide any counters\n"); | |
4905ec2f | 1036 | goto out_free; |
e9991434 | 1037 | } |
ee95b88d VM |
1038 | |
1039 | /* It is possible to get from SBI more than max number of counters */ | |
1040 | if (num_counters > RISCV_MAX_COUNTERS) { | |
1041 | num_counters = RISCV_MAX_COUNTERS; | |
1042 | pr_info("SBI returned more than maximum number of counters. Limiting the number of counters to %d\n", num_counters); | |
1043 | } | |
e9991434 AP |
1044 | |
1045 | /* cache all the information about counters now */ | |
1537bf26 | 1046 | if (pmu_sbi_get_ctrinfo(num_counters, &cmask)) |
4905ec2f | 1047 | goto out_free; |
e9991434 | 1048 | |
4905ec2f AP |
1049 | ret = pmu_sbi_setup_irqs(pmu, pdev); |
1050 | if (ret < 0) { | |
1051 | pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n"); | |
1052 | pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; | |
1053 | pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; | |
1054 | } | |
1537bf26 | 1055 | |
26fabd6d | 1056 | pmu->pmu.attr_groups = riscv_pmu_attr_groups; |
1537bf26 | 1057 | pmu->cmask = cmask; |
e9991434 AP |
1058 | pmu->ctr_start = pmu_sbi_ctr_start; |
1059 | pmu->ctr_stop = pmu_sbi_ctr_stop; | |
1060 | pmu->event_map = pmu_sbi_event_map; | |
1061 | pmu->ctr_get_idx = pmu_sbi_ctr_get_idx; | |
1062 | pmu->ctr_get_width = pmu_sbi_ctr_get_width; | |
1063 | pmu->ctr_clear_idx = pmu_sbi_ctr_clear_idx; | |
1064 | pmu->ctr_read = pmu_sbi_ctr_read; | |
cc4c07c8 AG |
1065 | pmu->event_init = pmu_sbi_event_init; |
1066 | pmu->event_mapped = pmu_sbi_event_mapped; | |
1067 | pmu->event_unmapped = pmu_sbi_event_unmapped; | |
1068 | pmu->csr_index = pmu_sbi_csr_index; | |
e9991434 AP |
1069 | |
1070 | ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); | |
1071 | if (ret) | |
1072 | return ret; | |
1073 | ||
e9a023f2 EL |
1074 | ret = riscv_pm_pmu_register(pmu); |
1075 | if (ret) | |
1076 | goto out_unregister; | |
1077 | ||
e9991434 | 1078 | ret = perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW); |
e9a023f2 EL |
1079 | if (ret) |
1080 | goto out_unregister; | |
e9991434 | 1081 | |
cc4c07c8 AG |
1082 | register_sysctl("kernel", sbi_pmu_sysctl_table); |
1083 | ||
e9991434 | 1084 | return 0; |
4905ec2f | 1085 | |
e9a023f2 EL |
1086 | out_unregister: |
1087 | riscv_pmu_destroy(pmu); | |
1088 | ||
4905ec2f AP |
1089 | out_free: |
1090 | kfree(pmu); | |
1091 | return ret; | |
e9991434 AP |
1092 | } |
1093 | ||
1094 | static struct platform_driver pmu_sbi_driver = { | |
1095 | .probe = pmu_sbi_device_probe, | |
1096 | .driver = { | |
d5ac062d | 1097 | .name = RISCV_PMU_SBI_PDEV_NAME, |
e9991434 AP |
1098 | }, |
1099 | }; | |
1100 | ||
1101 | static int __init pmu_sbi_devinit(void) | |
1102 | { | |
1103 | int ret; | |
1104 | struct platform_device *pdev; | |
1105 | ||
1106 | if (sbi_spec_version < sbi_mk_version(0, 3) || | |
41cad828 | 1107 | !sbi_probe_extension(SBI_EXT_PMU)) { |
e9991434 AP |
1108 | return 0; |
1109 | } | |
1110 | ||
1111 | ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING, | |
1112 | "perf/riscv/pmu:starting", | |
1113 | pmu_sbi_starting_cpu, pmu_sbi_dying_cpu); | |
1114 | if (ret) { | |
1115 | pr_err("CPU hotplug notifier could not be registered: %d\n", | |
1116 | ret); | |
1117 | return ret; | |
1118 | } | |
1119 | ||
1120 | ret = platform_driver_register(&pmu_sbi_driver); | |
1121 | if (ret) | |
1122 | return ret; | |
1123 | ||
d5ac062d | 1124 | pdev = platform_device_register_simple(RISCV_PMU_SBI_PDEV_NAME, -1, NULL, 0); |
e9991434 AP |
1125 | if (IS_ERR(pdev)) { |
1126 | platform_driver_unregister(&pmu_sbi_driver); | |
1127 | return PTR_ERR(pdev); | |
1128 | } | |
1129 | ||
1130 | /* Notify legacy implementation that SBI pmu is available*/ | |
1131 | riscv_pmu_legacy_skip_init(); | |
1132 | ||
1133 | return ret; | |
1134 | } | |
1135 | device_initcall(pmu_sbi_devinit) |