Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
0e93a6ed CS |
2 | /* |
3 | * Hypervisor supplied "24x7" performance counter support | |
4 | * | |
5 | * Author: Cody P Schafer <cody@linux.vnet.ibm.com> | |
6 | * Copyright 2014 IBM Corporation. | |
0e93a6ed CS |
7 | */ |
8 | ||
9 | #define pr_fmt(fmt) "hv-24x7: " fmt | |
10 | ||
11 | #include <linux/perf_event.h> | |
5c5cd7b5 | 12 | #include <linux/rbtree.h> |
0e93a6ed CS |
13 | #include <linux/module.h> |
14 | #include <linux/slab.h> | |
5c5cd7b5 CS |
15 | #include <linux/vmalloc.h> |
16 | ||
2e6553aa | 17 | #include <asm/cputhreads.h> |
0e93a6ed CS |
18 | #include <asm/firmware.h> |
19 | #include <asm/hvcall.h> | |
20 | #include <asm/io.h> | |
5c5cd7b5 | 21 | #include <linux/byteorder/generic.h> |
0e93a6ed | 22 | |
8ba21426 | 23 | #include <asm/rtas.h> |
0e93a6ed CS |
24 | #include "hv-24x7.h" |
25 | #include "hv-24x7-catalog.h" | |
26 | #include "hv-common.h" | |
27 | ||
2e6553aa TJB |
28 | /* Version of the 24x7 hypervisor API that we should use in this machine. */ |
29 | static int interface_version; | |
30 | ||
bfaa7834 TJB |
31 | /* Whether we have to aggregate result data for some domains. */ |
32 | static bool aggregate_result_elements; | |
33 | ||
1a8f0886 KJ |
34 | static cpumask_t hv_24x7_cpumask; |
35 | ||
76c452b4 | 36 | static bool domain_is_valid(unsigned int domain) |
5c5cd7b5 CS |
37 | { |
38 | switch (domain) { | |
39 | #define DOMAIN(n, v, x, c) \ | |
40 | case HV_PERF_DOMAIN_##n: \ | |
41 | /* fall through */ | |
42 | #include "hv-24x7-domains.h" | |
43 | #undef DOMAIN | |
44 | return true; | |
45 | default: | |
46 | return false; | |
47 | } | |
48 | } | |
49 | ||
76c452b4 | 50 | static bool is_physical_domain(unsigned int domain) |
5c5cd7b5 CS |
51 | { |
52 | switch (domain) { | |
53 | #define DOMAIN(n, v, x, c) \ | |
54 | case HV_PERF_DOMAIN_##n: \ | |
55 | return c; | |
56 | #include "hv-24x7-domains.h" | |
57 | #undef DOMAIN | |
58 | default: | |
59 | return false; | |
60 | } | |
61 | } | |
62 | ||
8ba21426 KJ |
63 | /* |
64 | * The Processor Module Information system parameter allows transferring | |
65 | * of certain processor module information from the platform to the OS. | |
66 | * Refer PAPR+ document to get parameter token value as '43'. | |
67 | */ | |
68 | ||
69 | #define PROCESSOR_MODULE_INFO 43 | |
70 | ||
71 | static u32 phys_sockets; /* Physical sockets */ | |
72 | static u32 phys_chipspersocket; /* Physical chips per socket*/ | |
73 | static u32 phys_coresperchip; /* Physical cores per chip */ | |
74 | ||
75 | /* | |
76 | * read_24x7_sys_info() | |
77 | * Retrieve the number of sockets and chips per socket and cores per | |
78 | * chip details through the get-system-parameter rtas call. | |
79 | */ | |
80 | void read_24x7_sys_info(void) | |
81 | { | |
82 | int call_status, len, ntypes; | |
83 | ||
84 | spin_lock(&rtas_data_buf_lock); | |
85 | ||
86 | /* | |
87 | * Making system parameter: chips and sockets and cores per chip | |
88 | * default to 1. | |
89 | */ | |
90 | phys_sockets = 1; | |
91 | phys_chipspersocket = 1; | |
92 | phys_coresperchip = 1; | |
93 | ||
94 | call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, | |
95 | NULL, | |
96 | PROCESSOR_MODULE_INFO, | |
97 | __pa(rtas_data_buf), | |
98 | RTAS_DATA_BUF_SIZE); | |
99 | ||
100 | if (call_status != 0) { | |
101 | pr_err("Error calling get-system-parameter %d\n", | |
102 | call_status); | |
103 | } else { | |
104 | len = be16_to_cpup((__be16 *)&rtas_data_buf[0]); | |
105 | if (len < 8) | |
106 | goto out; | |
107 | ||
108 | ntypes = be16_to_cpup((__be16 *)&rtas_data_buf[2]); | |
109 | ||
110 | if (!ntypes) | |
111 | goto out; | |
112 | ||
113 | phys_sockets = be16_to_cpup((__be16 *)&rtas_data_buf[4]); | |
114 | phys_chipspersocket = be16_to_cpup((__be16 *)&rtas_data_buf[6]); | |
115 | phys_coresperchip = be16_to_cpup((__be16 *)&rtas_data_buf[8]); | |
116 | } | |
117 | ||
118 | out: | |
119 | spin_unlock(&rtas_data_buf_lock); | |
120 | } | |
121 | ||
bfaa7834 TJB |
122 | /* Domains for which more than one result element are returned for each event. */ |
123 | static bool domain_needs_aggregation(unsigned int domain) | |
124 | { | |
125 | return aggregate_result_elements && | |
126 | (domain == HV_PERF_DOMAIN_PHYS_CORE || | |
127 | (domain >= HV_PERF_DOMAIN_VCPU_HOME_CORE && | |
128 | domain <= HV_PERF_DOMAIN_VCPU_REMOTE_NODE)); | |
129 | } | |
130 | ||
76c452b4 | 131 | static const char *domain_name(unsigned int domain) |
d34171e8 SB |
132 | { |
133 | if (!domain_is_valid(domain)) | |
134 | return NULL; | |
135 | ||
136 | switch (domain) { | |
137 | case HV_PERF_DOMAIN_PHYS_CHIP: return "Physical Chip"; | |
138 | case HV_PERF_DOMAIN_PHYS_CORE: return "Physical Core"; | |
139 | case HV_PERF_DOMAIN_VCPU_HOME_CORE: return "VCPU Home Core"; | |
140 | case HV_PERF_DOMAIN_VCPU_HOME_CHIP: return "VCPU Home Chip"; | |
141 | case HV_PERF_DOMAIN_VCPU_HOME_NODE: return "VCPU Home Node"; | |
142 | case HV_PERF_DOMAIN_VCPU_REMOTE_NODE: return "VCPU Remote Node"; | |
143 | } | |
144 | ||
145 | WARN_ON_ONCE(domain); | |
146 | return NULL; | |
147 | } | |
148 | ||
76c452b4 | 149 | static bool catalog_entry_domain_is_valid(unsigned int domain) |
5c5cd7b5 | 150 | { |
2e6553aa TJB |
151 | /* POWER8 doesn't support virtual domains. */ |
152 | if (interface_version == 1) | |
153 | return is_physical_domain(domain); | |
154 | else | |
155 | return domain_is_valid(domain); | |
5c5cd7b5 CS |
156 | } |
157 | ||
0e93a6ed CS |
158 | /* |
159 | * TODO: Merging events: | |
160 | * - Think of the hcall as an interface to a 4d array of counters: | |
161 | * - x = domains | |
162 | * - y = indexes in the domain (core, chip, vcpu, node, etc) | |
163 | * - z = offset into the counter space | |
164 | * - w = lpars (guest vms, "logical partitions") | |
165 | * - A single request is: x,y,y_last,z,z_last,w,w_last | |
166 | * - this means we can retrieve a rectangle of counters in y,z for a single x. | |
167 | * | |
168 | * - Things to consider (ignoring w): | |
169 | * - input cost_per_request = 16 | |
170 | * - output cost_per_result(ys,zs) = 8 + 8 * ys + ys * zs | |
171 | * - limited number of requests per hcall (must fit into 4K bytes) | |
172 | * - 4k = 16 [buffer header] - 16 [request size] * request_count | |
173 | * - 255 requests per hcall | |
174 | * - sometimes it will be more efficient to read extra data and discard | |
175 | */ | |
176 | ||
177 | /* | |
178 | * Example usage: | |
5c5cd7b5 | 179 | * perf stat -e 'hv_24x7/domain=2,offset=8,vcpu=0,lpar=0xffffffff/' |
0e93a6ed CS |
180 | */ |
181 | ||
182 | /* u3 0-6, one of HV_24X7_PERF_DOMAIN */ | |
183 | EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3); | |
184 | /* u16 */ | |
5c5cd7b5 | 185 | EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31); |
e5a5886d | 186 | EVENT_DEFINE_RANGE_FORMAT(chip, config, 16, 31); |
5c5cd7b5 | 187 | EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31); |
0e93a6ed CS |
188 | /* u32, see "data_offset" */ |
189 | EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63); | |
190 | /* u16 */ | |
191 | EVENT_DEFINE_RANGE_FORMAT(lpar, config1, 0, 15); | |
192 | ||
193 | EVENT_DEFINE_RANGE(reserved1, config, 4, 15); | |
194 | EVENT_DEFINE_RANGE(reserved2, config1, 16, 63); | |
195 | EVENT_DEFINE_RANGE(reserved3, config2, 0, 63); | |
196 | ||
197 | static struct attribute *format_attrs[] = { | |
198 | &format_attr_domain.attr, | |
199 | &format_attr_offset.attr, | |
5c5cd7b5 | 200 | &format_attr_core.attr, |
e5a5886d | 201 | &format_attr_chip.attr, |
5c5cd7b5 | 202 | &format_attr_vcpu.attr, |
0e93a6ed CS |
203 | &format_attr_lpar.attr, |
204 | NULL, | |
205 | }; | |
206 | ||
6b3a3e12 | 207 | static const struct attribute_group format_group = { |
0e93a6ed CS |
208 | .name = "format", |
209 | .attrs = format_attrs, | |
210 | }; | |
211 | ||
5c5cd7b5 CS |
212 | static struct attribute_group event_group = { |
213 | .name = "events", | |
214 | /* .attrs is set in init */ | |
215 | }; | |
216 | ||
217 | static struct attribute_group event_desc_group = { | |
218 | .name = "event_descs", | |
219 | /* .attrs is set in init */ | |
220 | }; | |
221 | ||
222 | static struct attribute_group event_long_desc_group = { | |
223 | .name = "event_long_descs", | |
224 | /* .attrs is set in init */ | |
225 | }; | |
226 | ||
0e93a6ed CS |
227 | static struct kmem_cache *hv_page_cache; |
228 | ||
cc331eee BC |
229 | static DEFINE_PER_CPU(int, hv_24x7_txn_flags); |
230 | static DEFINE_PER_CPU(int, hv_24x7_txn_err); | |
88a48613 SB |
231 | |
232 | struct hv_24x7_hw { | |
233 | struct perf_event *events[255]; | |
234 | }; | |
235 | ||
cc331eee | 236 | static DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); |
88a48613 | 237 | |
145264e2 SB |
238 | /* |
239 | * request_buffer and result_buffer are not required to be 4k aligned, | |
240 | * but are not allowed to cross any 4k boundary. Aligning them to 4k is | |
241 | * the simplest way to ensure that. | |
242 | */ | |
243 | #define H24x7_DATA_BUFFER_SIZE 4096 | |
cc331eee BC |
244 | static DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); |
245 | static DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); | |
145264e2 | 246 | |
2e6553aa TJB |
247 | static unsigned int max_num_requests(int interface_version) |
248 | { | |
249 | return (H24x7_DATA_BUFFER_SIZE - sizeof(struct hv_24x7_request_buffer)) | |
250 | / H24x7_REQUEST_SIZE(interface_version); | |
251 | } | |
36c8fb2c | 252 | |
5c5cd7b5 CS |
253 | static char *event_name(struct hv_24x7_event_data *ev, int *len) |
254 | { | |
255 | *len = be16_to_cpu(ev->event_name_len) - 2; | |
256 | return (char *)ev->remainder; | |
257 | } | |
258 | ||
259 | static char *event_desc(struct hv_24x7_event_data *ev, int *len) | |
260 | { | |
76c452b4 | 261 | unsigned int nl = be16_to_cpu(ev->event_name_len); |
5c5cd7b5 | 262 | __be16 *desc_len = (__be16 *)(ev->remainder + nl - 2); |
3ca4ea71 | 263 | |
5c5cd7b5 CS |
264 | *len = be16_to_cpu(*desc_len) - 2; |
265 | return (char *)ev->remainder + nl; | |
266 | } | |
267 | ||
268 | static char *event_long_desc(struct hv_24x7_event_data *ev, int *len) | |
269 | { | |
76c452b4 | 270 | unsigned int nl = be16_to_cpu(ev->event_name_len); |
5c5cd7b5 | 271 | __be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2); |
76c452b4 | 272 | unsigned int desc_len = be16_to_cpu(*desc_len_); |
5c5cd7b5 | 273 | __be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2); |
3ca4ea71 | 274 | |
5c5cd7b5 CS |
275 | *len = be16_to_cpu(*long_desc_len) - 2; |
276 | return (char *)ev->remainder + nl + desc_len; | |
277 | } | |
278 | ||
279 | static bool event_fixed_portion_is_within(struct hv_24x7_event_data *ev, | |
280 | void *end) | |
281 | { | |
282 | void *start = ev; | |
283 | ||
284 | return (start + offsetof(struct hv_24x7_event_data, remainder)) < end; | |
285 | } | |
286 | ||
287 | /* | |
288 | * Things we don't check: | |
289 | * - padding for desc, name, and long/detailed desc is required to be '\0' | |
290 | * bytes. | |
291 | * | |
292 | * Return NULL if we pass end, | |
293 | * Otherwise return the address of the byte just following the event. | |
294 | */ | |
295 | static void *event_end(struct hv_24x7_event_data *ev, void *end) | |
296 | { | |
297 | void *start = ev; | |
298 | __be16 *dl_, *ldl_; | |
76c452b4 JW |
299 | unsigned int dl, ldl; |
300 | unsigned int nl = be16_to_cpu(ev->event_name_len); | |
5c5cd7b5 CS |
301 | |
302 | if (nl < 2) { | |
303 | pr_debug("%s: name length too short: %d", __func__, nl); | |
304 | return NULL; | |
305 | } | |
306 | ||
307 | if (start + nl > end) { | |
308 | pr_debug("%s: start=%p + nl=%u > end=%p", | |
309 | __func__, start, nl, end); | |
310 | return NULL; | |
311 | } | |
312 | ||
313 | dl_ = (__be16 *)(ev->remainder + nl - 2); | |
314 | if (!IS_ALIGNED((uintptr_t)dl_, 2)) | |
315 | pr_warn("desc len not aligned %p", dl_); | |
316 | dl = be16_to_cpu(*dl_); | |
317 | if (dl < 2) { | |
318 | pr_debug("%s: desc len too short: %d", __func__, dl); | |
319 | return NULL; | |
320 | } | |
321 | ||
322 | if (start + nl + dl > end) { | |
323 | pr_debug("%s: (start=%p + nl=%u + dl=%u)=%p > end=%p", | |
324 | __func__, start, nl, dl, start + nl + dl, end); | |
325 | return NULL; | |
326 | } | |
327 | ||
328 | ldl_ = (__be16 *)(ev->remainder + nl + dl - 2); | |
329 | if (!IS_ALIGNED((uintptr_t)ldl_, 2)) | |
330 | pr_warn("long desc len not aligned %p", ldl_); | |
331 | ldl = be16_to_cpu(*ldl_); | |
332 | if (ldl < 2) { | |
333 | pr_debug("%s: long desc len too short (ldl=%u)", | |
334 | __func__, ldl); | |
335 | return NULL; | |
336 | } | |
337 | ||
338 | if (start + nl + dl + ldl > end) { | |
339 | pr_debug("%s: start=%p + nl=%u + dl=%u + ldl=%u > end=%p", | |
340 | __func__, start, nl, dl, ldl, end); | |
341 | return NULL; | |
342 | } | |
343 | ||
344 | return start + nl + dl + ldl; | |
345 | } | |
346 | ||
38d81846 TJB |
347 | static long h_get_24x7_catalog_page_(unsigned long phys_4096, |
348 | unsigned long version, unsigned long index) | |
0e93a6ed | 349 | { |
78d13166 | 350 | pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)", |
3ca4ea71 SB |
351 | phys_4096, version, index); |
352 | ||
78d13166 | 353 | WARN_ON(!IS_ALIGNED(phys_4096, 4096)); |
3ca4ea71 | 354 | |
0e93a6ed | 355 | return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE, |
3ca4ea71 | 356 | phys_4096, version, index); |
0e93a6ed CS |
357 | } |
358 | ||
38d81846 | 359 | static long h_get_24x7_catalog_page(char page[], u64 version, u32 index) |
78d13166 CS |
360 | { |
361 | return h_get_24x7_catalog_page_(virt_to_phys(page), | |
362 | version, index); | |
363 | } | |
364 | ||
8f69dc70 SB |
365 | /* |
366 | * Each event we find in the catalog, will have a sysfs entry. Format the | |
367 | * data for this sysfs entry based on the event's domain. | |
368 | * | |
369 | * Events belonging to the Chip domain can only be monitored in that domain. | |
370 | * i.e the domain for these events is a fixed/knwon value. | |
371 | * | |
372 | * Events belonging to the Core domain can be monitored either in the physical | |
373 | * core or in one of the virtual CPU domains. So the domain value for these | |
374 | * events must be specified by the user (i.e is a required parameter). Format | |
375 | * the Core events with 'domain=?' so the perf-tool can error check required | |
376 | * parameters. | |
377 | * | |
378 | * NOTE: For the Core domain events, rather than making domain a required | |
379 | * parameter we could default it to PHYS_CORE and allowe users to | |
380 | * override the domain to one of the VCPU domains. | |
381 | * | |
382 | * However, this can make the interface a little inconsistent. | |
383 | * | |
384 | * If we set domain=2 (PHYS_CHIP) and allow user to override this field | |
385 | * the user may be tempted to also modify the "offset=x" field in which | |
386 | * can lead to confusing usage. Consider the HPM_PCYC (offset=0x18) and | |
387 | * HPM_INST (offset=0x20) events. With: | |
388 | * | |
389 | * perf stat -e hv_24x7/HPM_PCYC,offset=0x20/ | |
390 | * | |
391 | * we end up monitoring HPM_INST, while the command line has HPM_PCYC. | |
392 | * | |
393 | * By not assigning a default value to the domain for the Core events, | |
394 | * we can have simple guidelines: | |
395 | * | |
396 | * - Specifying values for parameters with "=?" is required. | |
397 | * | |
398 | * - Specifying (i.e overriding) values for other parameters | |
399 | * is undefined. | |
400 | */ | |
76c452b4 | 401 | static char *event_fmt(struct hv_24x7_event_data *event, unsigned int domain) |
5c5cd7b5 CS |
402 | { |
403 | const char *sindex; | |
404 | const char *lpar; | |
8f69dc70 SB |
405 | const char *domain_str; |
406 | char buf[8]; | |
5c5cd7b5 | 407 | |
e5a5886d SB |
408 | switch (domain) { |
409 | case HV_PERF_DOMAIN_PHYS_CHIP: | |
8f69dc70 SB |
410 | snprintf(buf, sizeof(buf), "%d", domain); |
411 | domain_str = buf; | |
e5a5886d SB |
412 | lpar = "0x0"; |
413 | sindex = "chip"; | |
414 | break; | |
415 | case HV_PERF_DOMAIN_PHYS_CORE: | |
8f69dc70 | 416 | domain_str = "?"; |
5c5cd7b5 CS |
417 | lpar = "0x0"; |
418 | sindex = "core"; | |
e5a5886d SB |
419 | break; |
420 | default: | |
8f69dc70 | 421 | domain_str = "?"; |
5c5cd7b5 CS |
422 | lpar = "?"; |
423 | sindex = "vcpu"; | |
424 | } | |
425 | ||
426 | return kasprintf(GFP_KERNEL, | |
8f69dc70 SB |
427 | "domain=%s,offset=0x%x,%s=?,lpar=%s", |
428 | domain_str, | |
5c5cd7b5 CS |
429 | be16_to_cpu(event->event_counter_offs) + |
430 | be16_to_cpu(event->event_group_record_offs), | |
431 | sindex, | |
432 | lpar); | |
433 | } | |
434 | ||
435 | /* Avoid trusting fw to NUL terminate strings */ | |
436 | static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp) | |
437 | { | |
438 | return kasprintf(gfp, "%.*s", max_len, maybe_str); | |
439 | } | |
440 | ||
441 | static ssize_t device_show_string(struct device *dev, | |
442 | struct device_attribute *attr, char *buf) | |
443 | { | |
444 | struct dev_ext_attribute *d; | |
445 | ||
446 | d = container_of(attr, struct dev_ext_attribute, attr); | |
3ca4ea71 | 447 | |
5c5cd7b5 CS |
448 | return sprintf(buf, "%s\n", (char *)d->var); |
449 | } | |
450 | ||
792f73f7 KJ |
451 | static ssize_t cpumask_show(struct device *dev, |
452 | struct device_attribute *attr, char *buf) | |
453 | { | |
454 | return cpumap_print_to_pagebuf(true, buf, &hv_24x7_cpumask); | |
455 | } | |
456 | ||
60beb65d KJ |
457 | static ssize_t sockets_show(struct device *dev, |
458 | struct device_attribute *attr, char *buf) | |
459 | { | |
460 | return sprintf(buf, "%d\n", phys_sockets); | |
461 | } | |
462 | ||
463 | static ssize_t chipspersocket_show(struct device *dev, | |
464 | struct device_attribute *attr, char *buf) | |
465 | { | |
466 | return sprintf(buf, "%d\n", phys_chipspersocket); | |
467 | } | |
468 | ||
469 | static ssize_t coresperchip_show(struct device *dev, | |
470 | struct device_attribute *attr, char *buf) | |
471 | { | |
472 | return sprintf(buf, "%d\n", phys_coresperchip); | |
473 | } | |
474 | ||
5c5cd7b5 CS |
475 | static struct attribute *device_str_attr_create_(char *name, char *str) |
476 | { | |
477 | struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL); | |
478 | ||
479 | if (!attr) | |
480 | return NULL; | |
481 | ||
442053e5 SB |
482 | sysfs_attr_init(&attr->attr.attr); |
483 | ||
5c5cd7b5 CS |
484 | attr->var = str; |
485 | attr->attr.attr.name = name; | |
486 | attr->attr.attr.mode = 0444; | |
487 | attr->attr.show = device_show_string; | |
3ca4ea71 | 488 | |
5c5cd7b5 CS |
489 | return &attr->attr.attr; |
490 | } | |
491 | ||
8f69dc70 SB |
492 | /* |
493 | * Allocate and initialize strings representing event attributes. | |
494 | * | |
495 | * NOTE: The strings allocated here are never destroyed and continue to | |
496 | * exist till shutdown. This is to allow us to create as many events | |
497 | * from the catalog as possible, even if we encounter errors with some. | |
498 | * In case of changes to error paths in future, these may need to be | |
499 | * freed by the caller. | |
500 | */ | |
5c5cd7b5 CS |
501 | static struct attribute *device_str_attr_create(char *name, int name_max, |
502 | int name_nonce, | |
503 | char *str, size_t str_max) | |
504 | { | |
505 | char *n; | |
506 | char *s = memdup_to_str(str, str_max, GFP_KERNEL); | |
507 | struct attribute *a; | |
508 | ||
509 | if (!s) | |
510 | return NULL; | |
511 | ||
512 | if (!name_nonce) | |
513 | n = kasprintf(GFP_KERNEL, "%.*s", name_max, name); | |
514 | else | |
515 | n = kasprintf(GFP_KERNEL, "%.*s__%d", name_max, name, | |
516 | name_nonce); | |
517 | if (!n) | |
518 | goto out_s; | |
519 | ||
520 | a = device_str_attr_create_(n, s); | |
521 | if (!a) | |
522 | goto out_n; | |
523 | ||
524 | return a; | |
525 | out_n: | |
526 | kfree(n); | |
527 | out_s: | |
528 | kfree(s); | |
529 | return NULL; | |
530 | } | |
531 | ||
76c452b4 | 532 | static struct attribute *event_to_attr(unsigned int ix, |
5c5cd7b5 | 533 | struct hv_24x7_event_data *event, |
76c452b4 | 534 | unsigned int domain, |
5c5cd7b5 CS |
535 | int nonce) |
536 | { | |
537 | int event_name_len; | |
538 | char *ev_name, *a_ev_name, *val; | |
5c5cd7b5 CS |
539 | struct attribute *attr; |
540 | ||
541 | if (!domain_is_valid(domain)) { | |
542 | pr_warn("catalog event %u has invalid domain %u\n", | |
543 | ix, domain); | |
544 | return NULL; | |
545 | } | |
546 | ||
547 | val = event_fmt(event, domain); | |
548 | if (!val) | |
549 | return NULL; | |
550 | ||
5c5cd7b5 CS |
551 | ev_name = event_name(event, &event_name_len); |
552 | if (!nonce) | |
8f69dc70 SB |
553 | a_ev_name = kasprintf(GFP_KERNEL, "%.*s", |
554 | (int)event_name_len, ev_name); | |
5c5cd7b5 | 555 | else |
8f69dc70 SB |
556 | a_ev_name = kasprintf(GFP_KERNEL, "%.*s__%d", |
557 | (int)event_name_len, ev_name, nonce); | |
5c5cd7b5 | 558 | |
5c5cd7b5 CS |
559 | if (!a_ev_name) |
560 | goto out_val; | |
561 | ||
562 | attr = device_str_attr_create_(a_ev_name, val); | |
563 | if (!attr) | |
564 | goto out_name; | |
565 | ||
566 | return attr; | |
567 | out_name: | |
568 | kfree(a_ev_name); | |
569 | out_val: | |
570 | kfree(val); | |
571 | return NULL; | |
572 | } | |
573 | ||
574 | static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event, | |
40386217 | 575 | int nonce) |
5c5cd7b5 CS |
576 | { |
577 | int nl, dl; | |
578 | char *name = event_name(event, &nl); | |
579 | char *desc = event_desc(event, &dl); | |
580 | ||
581 | /* If there isn't a description, don't create the sysfs file */ | |
582 | if (!dl) | |
583 | return NULL; | |
584 | ||
585 | return device_str_attr_create(name, nl, nonce, desc, dl); | |
586 | } | |
587 | ||
588 | static struct attribute * | |
589 | event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce) | |
590 | { | |
591 | int nl, dl; | |
592 | char *name = event_name(event, &nl); | |
593 | char *desc = event_long_desc(event, &dl); | |
594 | ||
595 | /* If there isn't a description, don't create the sysfs file */ | |
596 | if (!dl) | |
597 | return NULL; | |
598 | ||
599 | return device_str_attr_create(name, nl, nonce, desc, dl); | |
600 | } | |
601 | ||
76c452b4 JW |
602 | static int event_data_to_attrs(unsigned int ix, struct attribute **attrs, |
603 | struct hv_24x7_event_data *event, int nonce) | |
5c5cd7b5 | 604 | { |
8f69dc70 SB |
605 | *attrs = event_to_attr(ix, event, event->domain, nonce); |
606 | if (!*attrs) | |
5c5cd7b5 | 607 | return -1; |
5c5cd7b5 | 608 | |
8f69dc70 | 609 | return 0; |
5c5cd7b5 CS |
610 | } |
611 | ||
5c5cd7b5 CS |
612 | /* */ |
613 | struct event_uniq { | |
614 | struct rb_node node; | |
615 | const char *name; | |
616 | int nl; | |
76c452b4 JW |
617 | unsigned int ct; |
618 | unsigned int domain; | |
5c5cd7b5 CS |
619 | }; |
620 | ||
621 | static int memord(const void *d1, size_t s1, const void *d2, size_t s2) | |
622 | { | |
623 | if (s1 < s2) | |
624 | return 1; | |
05c14c03 | 625 | if (s1 > s2) |
5c5cd7b5 CS |
626 | return -1; |
627 | ||
628 | return memcmp(d1, d2, s1); | |
629 | } | |
630 | ||
76c452b4 JW |
631 | static int ev_uniq_ord(const void *v1, size_t s1, unsigned int d1, |
632 | const void *v2, size_t s2, unsigned int d2) | |
5c5cd7b5 CS |
633 | { |
634 | int r = memord(v1, s1, v2, s2); | |
635 | ||
636 | if (r) | |
637 | return r; | |
638 | if (d1 > d2) | |
639 | return 1; | |
640 | if (d2 > d1) | |
641 | return -1; | |
642 | return 0; | |
643 | } | |
644 | ||
645 | static int event_uniq_add(struct rb_root *root, const char *name, int nl, | |
76c452b4 | 646 | unsigned int domain) |
5c5cd7b5 CS |
647 | { |
648 | struct rb_node **new = &(root->rb_node), *parent = NULL; | |
649 | struct event_uniq *data; | |
650 | ||
651 | /* Figure out where to put new node */ | |
652 | while (*new) { | |
653 | struct event_uniq *it; | |
654 | int result; | |
655 | ||
c197922f | 656 | it = rb_entry(*new, struct event_uniq, node); |
5c5cd7b5 CS |
657 | result = ev_uniq_ord(name, nl, domain, it->name, it->nl, |
658 | it->domain); | |
659 | ||
660 | parent = *new; | |
661 | if (result < 0) | |
662 | new = &((*new)->rb_left); | |
663 | else if (result > 0) | |
664 | new = &((*new)->rb_right); | |
665 | else { | |
666 | it->ct++; | |
667 | pr_info("found a duplicate event %.*s, ct=%u\n", nl, | |
668 | name, it->ct); | |
669 | return it->ct; | |
670 | } | |
671 | } | |
672 | ||
673 | data = kmalloc(sizeof(*data), GFP_KERNEL); | |
674 | if (!data) | |
675 | return -ENOMEM; | |
676 | ||
677 | *data = (struct event_uniq) { | |
678 | .name = name, | |
679 | .nl = nl, | |
680 | .ct = 0, | |
681 | .domain = domain, | |
682 | }; | |
683 | ||
684 | /* Add new node and rebalance tree. */ | |
685 | rb_link_node(&data->node, parent, new); | |
686 | rb_insert_color(&data->node, root); | |
687 | ||
688 | /* data->ct */ | |
689 | return 0; | |
690 | } | |
691 | ||
692 | static void event_uniq_destroy(struct rb_root *root) | |
693 | { | |
694 | /* | |
695 | * the strings we point to are in the giant block of memory filled by | |
696 | * the catalog, and are freed separately. | |
697 | */ | |
698 | struct event_uniq *pos, *n; | |
699 | ||
700 | rbtree_postorder_for_each_entry_safe(pos, n, root, node) | |
701 | kfree(pos); | |
702 | } | |
703 | ||
704 | ||
705 | /* | |
706 | * ensure the event structure's sizes are self consistent and don't cause us to | |
707 | * read outside of the event | |
708 | * | |
709 | * On success, return the event length in bytes. | |
710 | * Otherwise, return -1 (and print as appropriate). | |
711 | */ | |
712 | static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event, | |
713 | size_t event_idx, | |
714 | size_t event_data_bytes, | |
715 | size_t event_entry_count, | |
716 | size_t offset, void *end) | |
717 | { | |
718 | ssize_t ev_len; | |
719 | void *ev_end, *calc_ev_end; | |
720 | ||
721 | if (offset >= event_data_bytes) | |
722 | return -1; | |
723 | ||
724 | if (event_idx >= event_entry_count) { | |
725 | pr_devel("catalog event data has %zu bytes of padding after last event\n", | |
726 | event_data_bytes - offset); | |
727 | return -1; | |
728 | } | |
729 | ||
730 | if (!event_fixed_portion_is_within(event, end)) { | |
731 | pr_warn("event %zu fixed portion is not within range\n", | |
732 | event_idx); | |
733 | return -1; | |
734 | } | |
735 | ||
736 | ev_len = be16_to_cpu(event->length); | |
737 | ||
738 | if (ev_len % 16) | |
739 | pr_info("event %zu has length %zu not divisible by 16: event=%pK\n", | |
740 | event_idx, ev_len, event); | |
741 | ||
742 | ev_end = (__u8 *)event + ev_len; | |
743 | if (ev_end > end) { | |
744 | pr_warn("event %zu has .length=%zu, ends after buffer end: ev_end=%pK > end=%pK, offset=%zu\n", | |
745 | event_idx, ev_len, ev_end, end, | |
746 | offset); | |
747 | return -1; | |
748 | } | |
749 | ||
750 | calc_ev_end = event_end(event, end); | |
751 | if (!calc_ev_end) { | |
752 | pr_warn("event %zu has a calculated length which exceeds buffer length %zu: event=%pK end=%pK, offset=%zu\n", | |
753 | event_idx, event_data_bytes, event, end, | |
754 | offset); | |
755 | return -1; | |
756 | } | |
757 | ||
758 | if (calc_ev_end > ev_end) { | |
5b09250c | 759 | pr_warn("event %zu exceeds its own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n", |
5c5cd7b5 CS |
760 | event_idx, event, ev_end, offset, calc_ev_end); |
761 | return -1; | |
762 | } | |
763 | ||
764 | return ev_len; | |
765 | } | |
766 | ||
e5f9d885 KJ |
767 | /* |
768 | * Return true incase of invalid or dummy events with names like RESERVED* | |
769 | */ | |
770 | static bool ignore_event(const char *name) | |
771 | { | |
772 | return strncmp(name, "RESERVED", 8) == 0; | |
773 | } | |
774 | ||
5c5cd7b5 CS |
775 | #define MAX_4K (SIZE_MAX / 4096) |
776 | ||
7debc970 | 777 | static int create_events_from_catalog(struct attribute ***events_, |
40386217 SB |
778 | struct attribute ***event_descs_, |
779 | struct attribute ***event_long_descs_) | |
5c5cd7b5 | 780 | { |
38d81846 | 781 | long hret; |
5c5cd7b5 CS |
782 | size_t catalog_len, catalog_page_len, event_entry_count, |
783 | event_data_len, event_data_offs, | |
784 | event_data_bytes, junk_events, event_idx, event_attr_ct, i, | |
785 | attr_max, event_idx_last, desc_ct, long_desc_ct; | |
786 | ssize_t ct, ev_len; | |
12bf85a7 | 787 | uint64_t catalog_version_num; |
5c5cd7b5 CS |
788 | struct attribute **events, **event_descs, **event_long_descs; |
789 | struct hv_24x7_catalog_page_0 *page_0 = | |
790 | kmem_cache_alloc(hv_page_cache, GFP_KERNEL); | |
791 | void *page = page_0; | |
792 | void *event_data, *end; | |
793 | struct hv_24x7_event_data *event; | |
794 | struct rb_root ev_uniq = RB_ROOT; | |
7debc970 | 795 | int ret = 0; |
5c5cd7b5 | 796 | |
7debc970 LZ |
797 | if (!page) { |
798 | ret = -ENOMEM; | |
5c5cd7b5 | 799 | goto e_out; |
7debc970 | 800 | } |
5c5cd7b5 CS |
801 | |
802 | hret = h_get_24x7_catalog_page(page, 0, 0); | |
7debc970 LZ |
803 | if (hret) { |
804 | ret = -EIO; | |
5c5cd7b5 | 805 | goto e_free; |
7debc970 | 806 | } |
5c5cd7b5 CS |
807 | |
808 | catalog_version_num = be64_to_cpu(page_0->version); | |
809 | catalog_page_len = be32_to_cpu(page_0->length); | |
810 | ||
811 | if (MAX_4K < catalog_page_len) { | |
812 | pr_err("invalid page count: %zu\n", catalog_page_len); | |
7debc970 | 813 | ret = -EIO; |
5c5cd7b5 CS |
814 | goto e_free; |
815 | } | |
816 | ||
817 | catalog_len = catalog_page_len * 4096; | |
818 | ||
819 | event_entry_count = be16_to_cpu(page_0->event_entry_count); | |
820 | event_data_offs = be16_to_cpu(page_0->event_data_offs); | |
821 | event_data_len = be16_to_cpu(page_0->event_data_len); | |
822 | ||
12bf85a7 TJB |
823 | pr_devel("cv %llu cl %zu eec %zu edo %zu edl %zu\n", |
824 | catalog_version_num, catalog_len, | |
5c5cd7b5 CS |
825 | event_entry_count, event_data_offs, event_data_len); |
826 | ||
827 | if ((MAX_4K < event_data_len) | |
828 | || (MAX_4K < event_data_offs) | |
829 | || (MAX_4K - event_data_offs < event_data_len)) { | |
830 | pr_err("invalid event data offs %zu and/or len %zu\n", | |
831 | event_data_offs, event_data_len); | |
7debc970 | 832 | ret = -EIO; |
5c5cd7b5 CS |
833 | goto e_free; |
834 | } | |
835 | ||
836 | if ((event_data_offs + event_data_len) > catalog_page_len) { | |
837 | pr_err("event data %zu-%zu does not fit inside catalog 0-%zu\n", | |
838 | event_data_offs, | |
839 | event_data_offs + event_data_len, | |
840 | catalog_page_len); | |
7debc970 | 841 | ret = -EIO; |
5c5cd7b5 CS |
842 | goto e_free; |
843 | } | |
844 | ||
8f69dc70 SB |
845 | if (SIZE_MAX - 1 < event_entry_count) { |
846 | pr_err("event_entry_count %zu is invalid\n", event_entry_count); | |
7debc970 | 847 | ret = -EIO; |
5c5cd7b5 CS |
848 | goto e_free; |
849 | } | |
850 | ||
851 | event_data_bytes = event_data_len * 4096; | |
852 | ||
853 | /* | |
854 | * event data can span several pages, events can cross between these | |
855 | * pages. Use vmalloc to make this easier. | |
856 | */ | |
857 | event_data = vmalloc(event_data_bytes); | |
858 | if (!event_data) { | |
859 | pr_err("could not allocate event data\n"); | |
7debc970 | 860 | ret = -ENOMEM; |
5c5cd7b5 CS |
861 | goto e_free; |
862 | } | |
863 | ||
864 | end = event_data + event_data_bytes; | |
865 | ||
866 | /* | |
867 | * using vmalloc_to_phys() like this only works if PAGE_SIZE is | |
868 | * divisible by 4096 | |
869 | */ | |
870 | BUILD_BUG_ON(PAGE_SIZE % 4096); | |
871 | ||
872 | for (i = 0; i < event_data_len; i++) { | |
873 | hret = h_get_24x7_catalog_page_( | |
874 | vmalloc_to_phys(event_data + i * 4096), | |
875 | catalog_version_num, | |
876 | i + event_data_offs); | |
877 | if (hret) { | |
12bf85a7 TJB |
878 | pr_err("Failed to get event data in page %zu: rc=%ld\n", |
879 | i + event_data_offs, hret); | |
7debc970 | 880 | ret = -EIO; |
5c5cd7b5 CS |
881 | goto e_event_data; |
882 | } | |
883 | } | |
884 | ||
885 | /* | |
886 | * scan the catalog to determine the number of attributes we need, and | |
887 | * verify it at the same time. | |
888 | */ | |
889 | for (junk_events = 0, event = event_data, event_idx = 0, attr_max = 0; | |
890 | ; | |
891 | event_idx++, event = (void *)event + ev_len) { | |
892 | size_t offset = (void *)event - (void *)event_data; | |
893 | char *name; | |
894 | int nl; | |
895 | ||
896 | ev_len = catalog_event_len_validate(event, event_idx, | |
897 | event_data_bytes, | |
898 | event_entry_count, | |
899 | offset, end); | |
900 | if (ev_len < 0) | |
901 | break; | |
902 | ||
903 | name = event_name(event, &nl); | |
904 | ||
e5f9d885 KJ |
905 | if (ignore_event(name)) { |
906 | junk_events++; | |
907 | continue; | |
908 | } | |
5c5cd7b5 CS |
909 | if (event->event_group_record_len == 0) { |
910 | pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n", | |
911 | event_idx, nl, name); | |
912 | junk_events++; | |
913 | continue; | |
914 | } | |
915 | ||
916 | if (!catalog_entry_domain_is_valid(event->domain)) { | |
917 | pr_info("event %zu (%.*s) has invalid domain %d\n", | |
918 | event_idx, nl, name, event->domain); | |
919 | junk_events++; | |
920 | continue; | |
921 | } | |
922 | ||
8f69dc70 | 923 | attr_max++; |
5c5cd7b5 CS |
924 | } |
925 | ||
926 | event_idx_last = event_idx; | |
927 | if (event_idx_last != event_entry_count) | |
928 | pr_warn("event buffer ended before listed # of events were parsed (got %zu, wanted %zu, junk %zu)\n", | |
929 | event_idx_last, event_entry_count, junk_events); | |
930 | ||
931 | events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL); | |
7debc970 LZ |
932 | if (!events) { |
933 | ret = -ENOMEM; | |
5c5cd7b5 | 934 | goto e_event_data; |
7debc970 | 935 | } |
5c5cd7b5 CS |
936 | |
937 | event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs), | |
938 | GFP_KERNEL); | |
7debc970 LZ |
939 | if (!event_descs) { |
940 | ret = -ENOMEM; | |
5c5cd7b5 | 941 | goto e_event_attrs; |
7debc970 | 942 | } |
5c5cd7b5 CS |
943 | |
944 | event_long_descs = kmalloc_array(event_idx + 1, | |
945 | sizeof(*event_long_descs), GFP_KERNEL); | |
7debc970 LZ |
946 | if (!event_long_descs) { |
947 | ret = -ENOMEM; | |
5c5cd7b5 | 948 | goto e_event_descs; |
7debc970 | 949 | } |
5c5cd7b5 CS |
950 | |
951 | /* Iterate over the catalog filling in the attribute vector */ | |
952 | for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0, | |
953 | event = event_data, event_idx = 0; | |
954 | event_idx < event_idx_last; | |
955 | event_idx++, ev_len = be16_to_cpu(event->length), | |
956 | event = (void *)event + ev_len) { | |
957 | char *name; | |
958 | int nl; | |
959 | int nonce; | |
960 | /* | |
961 | * these are the only "bad" events that are intermixed and that | |
962 | * we can ignore without issue. make sure to skip them here | |
963 | */ | |
964 | if (event->event_group_record_len == 0) | |
965 | continue; | |
966 | if (!catalog_entry_domain_is_valid(event->domain)) | |
967 | continue; | |
968 | ||
969 | name = event_name(event, &nl); | |
e5f9d885 KJ |
970 | if (ignore_event(name)) |
971 | continue; | |
972 | ||
5c5cd7b5 CS |
973 | nonce = event_uniq_add(&ev_uniq, name, nl, event->domain); |
974 | ct = event_data_to_attrs(event_idx, events + event_attr_ct, | |
975 | event, nonce); | |
8f69dc70 | 976 | if (ct < 0) { |
5c5cd7b5 CS |
977 | pr_warn("event %zu (%.*s) creation failure, skipping\n", |
978 | event_idx, nl, name); | |
979 | junk_events++; | |
980 | } else { | |
8f69dc70 | 981 | event_attr_ct++; |
5c5cd7b5 CS |
982 | event_descs[desc_ct] = event_to_desc_attr(event, nonce); |
983 | if (event_descs[desc_ct]) | |
984 | desc_ct++; | |
985 | event_long_descs[long_desc_ct] = | |
986 | event_to_long_desc_attr(event, nonce); | |
987 | if (event_long_descs[long_desc_ct]) | |
988 | long_desc_ct++; | |
989 | } | |
990 | } | |
991 | ||
992 | pr_info("read %zu catalog entries, created %zu event attrs (%zu failures), %zu descs\n", | |
993 | event_idx, event_attr_ct, junk_events, desc_ct); | |
994 | ||
995 | events[event_attr_ct] = NULL; | |
996 | event_descs[desc_ct] = NULL; | |
997 | event_long_descs[long_desc_ct] = NULL; | |
998 | ||
999 | event_uniq_destroy(&ev_uniq); | |
1000 | vfree(event_data); | |
1001 | kmem_cache_free(hv_page_cache, page); | |
1002 | ||
1003 | *events_ = events; | |
1004 | *event_descs_ = event_descs; | |
1005 | *event_long_descs_ = event_long_descs; | |
7debc970 | 1006 | return 0; |
5c5cd7b5 CS |
1007 | |
1008 | e_event_descs: | |
1009 | kfree(event_descs); | |
1010 | e_event_attrs: | |
1011 | kfree(events); | |
1012 | e_event_data: | |
1013 | vfree(event_data); | |
1014 | e_free: | |
1015 | kmem_cache_free(hv_page_cache, page); | |
1016 | e_out: | |
1017 | *events_ = NULL; | |
1018 | *event_descs_ = NULL; | |
1019 | *event_long_descs_ = NULL; | |
7debc970 | 1020 | return ret; |
5c5cd7b5 CS |
1021 | } |
1022 | ||
0e93a6ed CS |
1023 | static ssize_t catalog_read(struct file *filp, struct kobject *kobj, |
1024 | struct bin_attribute *bin_attr, char *buf, | |
1025 | loff_t offset, size_t count) | |
1026 | { | |
38d81846 | 1027 | long hret; |
0e93a6ed | 1028 | ssize_t ret = 0; |
56f12bee | 1029 | size_t catalog_len = 0, catalog_page_len = 0; |
0e93a6ed | 1030 | loff_t page_offset = 0; |
56f12bee | 1031 | loff_t offset_in_page; |
1032 | size_t copy_len; | |
bbad3e50 | 1033 | uint64_t catalog_version_num = 0; |
0e93a6ed CS |
1034 | void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); |
1035 | struct hv_24x7_catalog_page_0 *page_0 = page; | |
3ca4ea71 | 1036 | |
0e93a6ed CS |
1037 | if (!page) |
1038 | return -ENOMEM; | |
1039 | ||
1040 | hret = h_get_24x7_catalog_page(page, 0, 0); | |
1041 | if (hret) { | |
1042 | ret = -EIO; | |
1043 | goto e_free; | |
1044 | } | |
1045 | ||
bbad3e50 | 1046 | catalog_version_num = be64_to_cpu(page_0->version); |
0e93a6ed CS |
1047 | catalog_page_len = be32_to_cpu(page_0->length); |
1048 | catalog_len = catalog_page_len * 4096; | |
1049 | ||
1050 | page_offset = offset / 4096; | |
56f12bee | 1051 | offset_in_page = offset % 4096; |
0e93a6ed CS |
1052 | |
1053 | if (page_offset >= catalog_page_len) | |
1054 | goto e_free; | |
1055 | ||
1056 | if (page_offset != 0) { | |
1057 | hret = h_get_24x7_catalog_page(page, catalog_version_num, | |
1058 | page_offset); | |
1059 | if (hret) { | |
1060 | ret = -EIO; | |
1061 | goto e_free; | |
1062 | } | |
1063 | } | |
1064 | ||
56f12bee | 1065 | copy_len = 4096 - offset_in_page; |
1066 | if (copy_len > count) | |
1067 | copy_len = count; | |
1068 | ||
1069 | memcpy(buf, page+offset_in_page, copy_len); | |
1070 | ret = copy_len; | |
1071 | ||
0e93a6ed CS |
1072 | e_free: |
1073 | if (hret) | |
bbad3e50 CS |
1074 | pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:" |
1075 | " rc=%ld\n", | |
1076 | catalog_version_num, page_offset, hret); | |
d6589722 | 1077 | kmem_cache_free(hv_page_cache, page); |
0e93a6ed | 1078 | |
56f12bee | 1079 | pr_devel("catalog_read: offset=%lld(%lld) count=%zu " |
1080 | "catalog_len=%zu(%zu) => %zd\n", offset, page_offset, | |
1081 | count, catalog_len, catalog_page_len, ret); | |
0e93a6ed CS |
1082 | |
1083 | return ret; | |
1084 | } | |
1085 | ||
d34171e8 SB |
1086 | static ssize_t domains_show(struct device *dev, struct device_attribute *attr, |
1087 | char *page) | |
1088 | { | |
1089 | int d, n, count = 0; | |
1090 | const char *str; | |
1091 | ||
1092 | for (d = 0; d < HV_PERF_DOMAIN_MAX; d++) { | |
1093 | str = domain_name(d); | |
1094 | if (!str) | |
1095 | continue; | |
1096 | ||
1097 | n = sprintf(page, "%d: %s\n", d, str); | |
1098 | if (n < 0) | |
1099 | break; | |
1100 | ||
1101 | count += n; | |
1102 | page += n; | |
1103 | } | |
1104 | return count; | |
1105 | } | |
1106 | ||
0e93a6ed CS |
1107 | #define PAGE_0_ATTR(_name, _fmt, _expr) \ |
1108 | static ssize_t _name##_show(struct device *dev, \ | |
1109 | struct device_attribute *dev_attr, \ | |
1110 | char *buf) \ | |
1111 | { \ | |
38d81846 | 1112 | long hret; \ |
0e93a6ed CS |
1113 | ssize_t ret = 0; \ |
1114 | void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); \ | |
1115 | struct hv_24x7_catalog_page_0 *page_0 = page; \ | |
1116 | if (!page) \ | |
1117 | return -ENOMEM; \ | |
1118 | hret = h_get_24x7_catalog_page(page, 0, 0); \ | |
1119 | if (hret) { \ | |
1120 | ret = -EIO; \ | |
1121 | goto e_free; \ | |
1122 | } \ | |
1123 | ret = sprintf(buf, _fmt, _expr); \ | |
1124 | e_free: \ | |
ec2aef5a | 1125 | kmem_cache_free(hv_page_cache, page); \ |
0e93a6ed CS |
1126 | return ret; \ |
1127 | } \ | |
1128 | static DEVICE_ATTR_RO(_name) | |
1129 | ||
1130 | PAGE_0_ATTR(catalog_version, "%lld\n", | |
bbad3e50 | 1131 | (unsigned long long)be64_to_cpu(page_0->version)); |
0e93a6ed CS |
1132 | PAGE_0_ATTR(catalog_len, "%lld\n", |
1133 | (unsigned long long)be32_to_cpu(page_0->length) * 4096); | |
1134 | static BIN_ATTR_RO(catalog, 0/* real length varies */); | |
d34171e8 | 1135 | static DEVICE_ATTR_RO(domains); |
60beb65d KJ |
1136 | static DEVICE_ATTR_RO(sockets); |
1137 | static DEVICE_ATTR_RO(chipspersocket); | |
1138 | static DEVICE_ATTR_RO(coresperchip); | |
792f73f7 | 1139 | static DEVICE_ATTR_RO(cpumask); |
0e93a6ed CS |
1140 | |
1141 | static struct bin_attribute *if_bin_attrs[] = { | |
1142 | &bin_attr_catalog, | |
1143 | NULL, | |
1144 | }; | |
1145 | ||
64ef8f2c KJ |
1146 | static struct attribute *cpumask_attrs[] = { |
1147 | &dev_attr_cpumask.attr, | |
1148 | NULL, | |
1149 | }; | |
1150 | ||
6b3a3e12 | 1151 | static const struct attribute_group cpumask_attr_group = { |
64ef8f2c KJ |
1152 | .attrs = cpumask_attrs, |
1153 | }; | |
1154 | ||
0e93a6ed CS |
1155 | static struct attribute *if_attrs[] = { |
1156 | &dev_attr_catalog_len.attr, | |
1157 | &dev_attr_catalog_version.attr, | |
d34171e8 | 1158 | &dev_attr_domains.attr, |
60beb65d KJ |
1159 | &dev_attr_sockets.attr, |
1160 | &dev_attr_chipspersocket.attr, | |
1161 | &dev_attr_coresperchip.attr, | |
0e93a6ed CS |
1162 | NULL, |
1163 | }; | |
1164 | ||
6b3a3e12 | 1165 | static const struct attribute_group if_group = { |
0e93a6ed CS |
1166 | .name = "interface", |
1167 | .bin_attrs = if_bin_attrs, | |
1168 | .attrs = if_attrs, | |
1169 | }; | |
1170 | ||
1171 | static const struct attribute_group *attr_groups[] = { | |
1172 | &format_group, | |
5c5cd7b5 CS |
1173 | &event_group, |
1174 | &event_desc_group, | |
1175 | &event_long_desc_group, | |
0e93a6ed | 1176 | &if_group, |
64ef8f2c | 1177 | &cpumask_attr_group, |
0e93a6ed CS |
1178 | NULL, |
1179 | }; | |
1180 | ||
aeab199d SB |
1181 | /* |
1182 | * Start the process for a new H_GET_24x7_DATA hcall. | |
1183 | */ | |
1184 | static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer, | |
40386217 | 1185 | struct hv_24x7_data_result_buffer *result_buffer) |
aeab199d SB |
1186 | { |
1187 | ||
ebd4a5a3 TJB |
1188 | memset(request_buffer, 0, H24x7_DATA_BUFFER_SIZE); |
1189 | memset(result_buffer, 0, H24x7_DATA_BUFFER_SIZE); | |
aeab199d | 1190 | |
2e6553aa | 1191 | request_buffer->interface_version = interface_version; |
aeab199d SB |
1192 | /* memset above set request_buffer->num_requests to 0 */ |
1193 | } | |
f34b6c72 | 1194 | |
aeab199d SB |
1195 | /* |
1196 | * Commit (i.e perform) the H_GET_24x7_DATA hcall using the data collected | |
1197 | * by 'init_24x7_request()' and 'add_event_to_24x7_request()'. | |
1198 | */ | |
1199 | static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer, | |
40386217 | 1200 | struct hv_24x7_data_result_buffer *result_buffer) |
0e93a6ed | 1201 | { |
38d81846 | 1202 | long ret; |
0e93a6ed CS |
1203 | |
1204 | /* | |
aeab199d SB |
1205 | * NOTE: Due to variable number of array elements in request and |
1206 | * result buffer(s), sizeof() is not reliable. Use the actual | |
1207 | * allocated buffer size, H24x7_DATA_BUFFER_SIZE. | |
0e93a6ed | 1208 | */ |
aeab199d SB |
1209 | ret = plpar_hcall_norets(H_GET_24X7_DATA, |
1210 | virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE, | |
1211 | virt_to_phys(result_buffer), H24x7_DATA_BUFFER_SIZE); | |
1212 | ||
62714a14 TJB |
1213 | if (ret) { |
1214 | struct hv_24x7_request *req; | |
1215 | ||
2e6553aa | 1216 | req = request_buffer->requests; |
62714a14 TJB |
1217 | pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => ret 0x%lx (%ld) detail=0x%x failing ix=%x\n", |
1218 | req->performance_domain, req->data_offset, | |
1219 | req->starting_ix, req->starting_lpar_ix, | |
1220 | ret, ret, result_buffer->detailed_rc, | |
1221 | result_buffer->failing_request_ix); | |
38d81846 | 1222 | return -EIO; |
62714a14 | 1223 | } |
aeab199d | 1224 | |
38d81846 | 1225 | return 0; |
aeab199d SB |
1226 | } |
1227 | ||
e3ee15dc SB |
1228 | /* |
1229 | * Add the given @event to the next slot in the 24x7 request_buffer. | |
1230 | * | |
1231 | * Note that H_GET_24X7_DATA hcall allows reading several counters' | |
1232 | * values in a single HCALL. We expect the caller to add events to the | |
1233 | * request buffer one by one, make the HCALL and process the results. | |
1234 | */ | |
1235 | static int add_event_to_24x7_request(struct perf_event *event, | |
1236 | struct hv_24x7_request_buffer *request_buffer) | |
0e93a6ed | 1237 | { |
80798764 | 1238 | u16 idx; |
e3ee15dc | 1239 | int i; |
2e6553aa | 1240 | size_t req_size; |
e3ee15dc SB |
1241 | struct hv_24x7_request *req; |
1242 | ||
2e6553aa TJB |
1243 | if (request_buffer->num_requests >= |
1244 | max_num_requests(request_buffer->interface_version)) { | |
e3ee15dc SB |
1245 | pr_devel("Too many requests for 24x7 HCALL %d\n", |
1246 | request_buffer->num_requests); | |
1247 | return -EINVAL; | |
1248 | } | |
1249 | ||
e5a5886d SB |
1250 | switch (event_get_domain(event)) { |
1251 | case HV_PERF_DOMAIN_PHYS_CHIP: | |
1252 | idx = event_get_chip(event); | |
1253 | break; | |
1254 | case HV_PERF_DOMAIN_PHYS_CORE: | |
e3ee15dc | 1255 | idx = event_get_core(event); |
e5a5886d SB |
1256 | break; |
1257 | default: | |
e3ee15dc | 1258 | idx = event_get_vcpu(event); |
e5a5886d | 1259 | } |
e3ee15dc | 1260 | |
2e6553aa TJB |
1261 | req_size = H24x7_REQUEST_SIZE(request_buffer->interface_version); |
1262 | ||
e3ee15dc | 1263 | i = request_buffer->num_requests++; |
2e6553aa | 1264 | req = (void *) request_buffer->requests + i * req_size; |
e3ee15dc SB |
1265 | |
1266 | req->performance_domain = event_get_domain(event); | |
1267 | req->data_size = cpu_to_be16(8); | |
1268 | req->data_offset = cpu_to_be32(event_get_offset(event)); | |
ebd4a5a3 | 1269 | req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event)); |
e3ee15dc SB |
1270 | req->max_num_lpars = cpu_to_be16(1); |
1271 | req->starting_ix = cpu_to_be16(idx); | |
1272 | req->max_ix = cpu_to_be16(1); | |
1273 | ||
bfaa7834 TJB |
1274 | if (request_buffer->interface_version > 1) { |
1275 | if (domain_needs_aggregation(req->performance_domain)) | |
1276 | req->max_num_thread_groups = -1; | |
1277 | else if (req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) { | |
1278 | req->starting_thread_group_ix = idx % 2; | |
1279 | req->max_num_thread_groups = 1; | |
1280 | } | |
2e6553aa TJB |
1281 | } |
1282 | ||
1283 | return 0; | |
1284 | } | |
1285 | ||
1286 | /** | |
bfaa7834 TJB |
1287 | * get_count_from_result - get event count from all result elements in result |
1288 | * | |
1289 | * If the event corresponding to this result needs aggregation of the result | |
1290 | * element values, then this function does that. | |
2e6553aa TJB |
1291 | * |
1292 | * @event: Event associated with @res. | |
1293 | * @resb: Result buffer containing @res. | |
1294 | * @res: Result to work on. | |
1295 | * @countp: Output variable containing the event count. | |
1296 | * @next: Optional output variable pointing to the next result in @resb. | |
1297 | */ | |
1298 | static int get_count_from_result(struct perf_event *event, | |
1299 | struct hv_24x7_data_result_buffer *resb, | |
1300 | struct hv_24x7_result *res, u64 *countp, | |
1301 | struct hv_24x7_result **next) | |
1302 | { | |
1303 | u16 num_elements = be16_to_cpu(res->num_elements_returned); | |
1304 | u16 data_size = be16_to_cpu(res->result_element_data_size); | |
1305 | unsigned int data_offset; | |
1306 | void *element_data; | |
bfaa7834 TJB |
1307 | int i; |
1308 | u64 count; | |
2e6553aa TJB |
1309 | |
1310 | /* | |
1311 | * We can bail out early if the result is empty. | |
1312 | */ | |
1313 | if (!num_elements) { | |
1314 | pr_debug("Result of request %hhu is empty, nothing to do\n", | |
1315 | res->result_ix); | |
1316 | ||
1317 | if (next) | |
1318 | *next = (struct hv_24x7_result *) res->elements; | |
1319 | ||
1320 | return -ENODATA; | |
1321 | } | |
1322 | ||
1323 | /* | |
1324 | * Since we always specify 1 as the maximum for the smallest resource | |
1325 | * we're requesting, there should to be only one element per result. | |
bfaa7834 | 1326 | * Except when an event needs aggregation, in which case there are more. |
2e6553aa | 1327 | */ |
bfaa7834 TJB |
1328 | if (num_elements != 1 && |
1329 | !domain_needs_aggregation(event_get_domain(event))) { | |
2e6553aa TJB |
1330 | pr_err("Error: result of request %hhu has %hu elements\n", |
1331 | res->result_ix, num_elements); | |
1332 | ||
1333 | return -EIO; | |
1334 | } | |
1335 | ||
1336 | if (data_size != sizeof(u64)) { | |
1337 | pr_debug("Error: result of request %hhu has data of %hu bytes\n", | |
1338 | res->result_ix, data_size); | |
1339 | ||
1340 | return -ENOTSUPP; | |
1341 | } | |
1342 | ||
1343 | if (resb->interface_version == 1) | |
1344 | data_offset = offsetof(struct hv_24x7_result_element_v1, | |
1345 | element_data); | |
1346 | else | |
1347 | data_offset = offsetof(struct hv_24x7_result_element_v2, | |
1348 | element_data); | |
1349 | ||
bfaa7834 TJB |
1350 | /* Go through the result elements in the result. */ |
1351 | for (i = count = 0, element_data = res->elements + data_offset; | |
1352 | i < num_elements; | |
1353 | i++, element_data += data_size + data_offset) | |
1354 | count += be64_to_cpu(*((u64 *) element_data)); | |
2e6553aa | 1355 | |
bfaa7834 | 1356 | *countp = count; |
2e6553aa | 1357 | |
bfaa7834 | 1358 | /* The next result is after the last result element. */ |
2e6553aa | 1359 | if (next) |
bfaa7834 | 1360 | *next = element_data - data_offset; |
2e6553aa | 1361 | |
e3ee15dc SB |
1362 | return 0; |
1363 | } | |
1364 | ||
38d81846 | 1365 | static int single_24x7_request(struct perf_event *event, u64 *count) |
e3ee15dc | 1366 | { |
38d81846 | 1367 | int ret; |
145264e2 SB |
1368 | struct hv_24x7_request_buffer *request_buffer; |
1369 | struct hv_24x7_data_result_buffer *result_buffer; | |
48bee8a6 CS |
1370 | |
1371 | BUILD_BUG_ON(sizeof(*request_buffer) > 4096); | |
1372 | BUILD_BUG_ON(sizeof(*result_buffer) > 4096); | |
1373 | ||
f34b6c72 | 1374 | request_buffer = (void *)get_cpu_var(hv_24x7_reqb); |
1375 | result_buffer = (void *)get_cpu_var(hv_24x7_resb); | |
48bee8a6 | 1376 | |
aeab199d | 1377 | init_24x7_request(request_buffer, result_buffer); |
0e93a6ed | 1378 | |
e3ee15dc SB |
1379 | ret = add_event_to_24x7_request(event, request_buffer); |
1380 | if (ret) | |
b816ce67 | 1381 | goto out; |
0e93a6ed | 1382 | |
aeab199d | 1383 | ret = make_24x7_request(request_buffer, result_buffer); |
62714a14 | 1384 | if (ret) |
f34b6c72 | 1385 | goto out; |
0e93a6ed | 1386 | |
aeab199d | 1387 | /* process result from hcall */ |
2e6553aa TJB |
1388 | ret = get_count_from_result(event, result_buffer, |
1389 | result_buffer->results, count, NULL); | |
48bee8a6 | 1390 | |
48bee8a6 | 1391 | out: |
b816ce67 SB |
1392 | put_cpu_var(hv_24x7_reqb); |
1393 | put_cpu_var(hv_24x7_resb); | |
0e93a6ed CS |
1394 | return ret; |
1395 | } | |
1396 | ||
0e93a6ed CS |
1397 | |
1398 | static int h_24x7_event_init(struct perf_event *event) | |
1399 | { | |
1400 | struct hv_perf_caps caps; | |
76c452b4 | 1401 | unsigned int domain; |
0e93a6ed CS |
1402 | unsigned long hret; |
1403 | u64 ct; | |
1404 | ||
1405 | /* Not our event */ | |
1406 | if (event->attr.type != event->pmu->type) | |
1407 | return -ENOENT; | |
1408 | ||
1409 | /* Unused areas must be 0 */ | |
1410 | if (event_get_reserved1(event) || | |
1411 | event_get_reserved2(event) || | |
1412 | event_get_reserved3(event)) { | |
1413 | pr_devel("reserved set when forbidden 0x%llx(0x%llx) 0x%llx(0x%llx) 0x%llx(0x%llx)\n", | |
1414 | event->attr.config, | |
1415 | event_get_reserved1(event), | |
1416 | event->attr.config1, | |
1417 | event_get_reserved2(event), | |
1418 | event->attr.config2, | |
1419 | event_get_reserved3(event)); | |
1420 | return -EINVAL; | |
1421 | } | |
1422 | ||
0e93a6ed CS |
1423 | /* no branch sampling */ |
1424 | if (has_branch_stack(event)) | |
1425 | return -EOPNOTSUPP; | |
1426 | ||
1427 | /* offset must be 8 byte aligned */ | |
1428 | if (event_get_offset(event) % 8) { | |
1429 | pr_devel("bad alignment\n"); | |
1430 | return -EINVAL; | |
1431 | } | |
1432 | ||
0e93a6ed | 1433 | domain = event_get_domain(event); |
ebd4a5a3 | 1434 | if (domain >= HV_PERF_DOMAIN_MAX) { |
0e93a6ed CS |
1435 | pr_devel("invalid domain %d\n", domain); |
1436 | return -EINVAL; | |
1437 | } | |
1438 | ||
1439 | hret = hv_perf_caps_get(&caps); | |
1440 | if (hret) { | |
1441 | pr_devel("could not get capabilities: rc=%ld\n", hret); | |
1442 | return -EIO; | |
1443 | } | |
1444 | ||
5c5cd7b5 | 1445 | /* Physical domains & other lpars require extra capabilities */ |
0e93a6ed CS |
1446 | if (!caps.collect_privileged && (is_physical_domain(domain) || |
1447 | (event_get_lpar(event) != event_get_lpar_max()))) { | |
f42cf8d6 | 1448 | pr_devel("hv permissions disallow: is_physical_domain:%d, lpar=0x%llx\n", |
0e93a6ed CS |
1449 | is_physical_domain(domain), |
1450 | event_get_lpar(event)); | |
1451 | return -EACCES; | |
1452 | } | |
1453 | ||
2b206ee6 | 1454 | /* Get the initial value of the counter for this event */ |
80798764 | 1455 | if (single_24x7_request(event, &ct)) { |
0e93a6ed CS |
1456 | pr_devel("test hcall failed\n"); |
1457 | return -EIO; | |
1458 | } | |
2b206ee6 | 1459 | (void)local64_xchg(&event->hw.prev_count, ct); |
0e93a6ed CS |
1460 | |
1461 | return 0; | |
1462 | } | |
1463 | ||
1464 | static u64 h_24x7_get_value(struct perf_event *event) | |
1465 | { | |
0e93a6ed | 1466 | u64 ct; |
38d81846 TJB |
1467 | |
1468 | if (single_24x7_request(event, &ct)) | |
0e93a6ed CS |
1469 | /* We checked this in event init, shouldn't fail here... */ |
1470 | return 0; | |
1471 | ||
1472 | return ct; | |
1473 | } | |
1474 | ||
529ce8c9 | 1475 | static void update_event_count(struct perf_event *event, u64 now) |
0e93a6ed CS |
1476 | { |
1477 | s64 prev; | |
3ca4ea71 | 1478 | |
0e93a6ed CS |
1479 | prev = local64_xchg(&event->hw.prev_count, now); |
1480 | local64_add(now - prev, &event->count); | |
1481 | } | |
1482 | ||
529ce8c9 SB |
1483 | static void h_24x7_event_read(struct perf_event *event) |
1484 | { | |
1485 | u64 now; | |
88a48613 SB |
1486 | struct hv_24x7_request_buffer *request_buffer; |
1487 | struct hv_24x7_hw *h24x7hw; | |
1488 | int txn_flags; | |
1489 | ||
1490 | txn_flags = __this_cpu_read(hv_24x7_txn_flags); | |
1491 | ||
1492 | /* | |
1493 | * If in a READ transaction, add this counter to the list of | |
1494 | * counters to read during the next HCALL (i.e commit_txn()). | |
1495 | * If not in a READ transaction, go ahead and make the HCALL | |
1496 | * to read this counter by itself. | |
1497 | */ | |
1498 | ||
1499 | if (txn_flags & PERF_PMU_TXN_READ) { | |
1500 | int i; | |
1501 | int ret; | |
529ce8c9 | 1502 | |
88a48613 SB |
1503 | if (__this_cpu_read(hv_24x7_txn_err)) |
1504 | return; | |
1505 | ||
1506 | request_buffer = (void *)get_cpu_var(hv_24x7_reqb); | |
1507 | ||
1508 | ret = add_event_to_24x7_request(event, request_buffer); | |
1509 | if (ret) { | |
1510 | __this_cpu_write(hv_24x7_txn_err, ret); | |
1511 | } else { | |
1512 | /* | |
027dfac6 | 1513 | * Associate the event with the HCALL request index, |
88a48613 SB |
1514 | * so ->commit_txn() can quickly find/update count. |
1515 | */ | |
1516 | i = request_buffer->num_requests - 1; | |
1517 | ||
1518 | h24x7hw = &get_cpu_var(hv_24x7_hw); | |
1519 | h24x7hw->events[i] = event; | |
1520 | put_cpu_var(h24x7hw); | |
1521 | } | |
1522 | ||
1523 | put_cpu_var(hv_24x7_reqb); | |
1524 | } else { | |
1525 | now = h_24x7_get_value(event); | |
1526 | update_event_count(event, now); | |
1527 | } | |
529ce8c9 SB |
1528 | } |
1529 | ||
0e93a6ed CS |
1530 | static void h_24x7_event_start(struct perf_event *event, int flags) |
1531 | { | |
1532 | if (flags & PERF_EF_RELOAD) | |
1533 | local64_set(&event->hw.prev_count, h_24x7_get_value(event)); | |
1534 | } | |
1535 | ||
1536 | static void h_24x7_event_stop(struct perf_event *event, int flags) | |
1537 | { | |
33ba14c0 | 1538 | h_24x7_event_read(event); |
0e93a6ed CS |
1539 | } |
1540 | ||
1541 | static int h_24x7_event_add(struct perf_event *event, int flags) | |
1542 | { | |
1543 | if (flags & PERF_EF_START) | |
1544 | h_24x7_event_start(event, flags); | |
1545 | ||
1546 | return 0; | |
1547 | } | |
1548 | ||
88a48613 SB |
1549 | /* |
1550 | * 24x7 counters only support READ transactions. They are | |
1551 | * always counting and dont need/support ADD transactions. | |
1552 | * Cache the flags, but otherwise ignore transactions that | |
1553 | * are not PERF_PMU_TXN_READ. | |
1554 | */ | |
1555 | static void h_24x7_event_start_txn(struct pmu *pmu, unsigned int flags) | |
1556 | { | |
1557 | struct hv_24x7_request_buffer *request_buffer; | |
1558 | struct hv_24x7_data_result_buffer *result_buffer; | |
1559 | ||
1560 | /* We should not be called if we are already in a txn */ | |
1561 | WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags)); | |
1562 | ||
1563 | __this_cpu_write(hv_24x7_txn_flags, flags); | |
1564 | if (flags & ~PERF_PMU_TXN_READ) | |
1565 | return; | |
1566 | ||
1567 | request_buffer = (void *)get_cpu_var(hv_24x7_reqb); | |
1568 | result_buffer = (void *)get_cpu_var(hv_24x7_resb); | |
1569 | ||
1570 | init_24x7_request(request_buffer, result_buffer); | |
1571 | ||
1572 | put_cpu_var(hv_24x7_resb); | |
1573 | put_cpu_var(hv_24x7_reqb); | |
1574 | } | |
1575 | ||
1576 | /* | |
1577 | * Clean up transaction state. | |
1578 | * | |
1579 | * NOTE: Ignore state of request and result buffers for now. | |
1580 | * We will initialize them during the next read/txn. | |
1581 | */ | |
1582 | static void reset_txn(void) | |
1583 | { | |
1584 | __this_cpu_write(hv_24x7_txn_flags, 0); | |
1585 | __this_cpu_write(hv_24x7_txn_err, 0); | |
1586 | } | |
1587 | ||
1588 | /* | |
1589 | * 24x7 counters only support READ transactions. They are always counting | |
1590 | * and dont need/support ADD transactions. Clear ->txn_flags but otherwise | |
1591 | * ignore transactions that are not of type PERF_PMU_TXN_READ. | |
1592 | * | |
1593 | * For READ transactions, submit all pending 24x7 requests (i.e requests | |
1594 | * that were queued by h_24x7_event_read()), to the hypervisor and update | |
1595 | * the event counts. | |
1596 | */ | |
1597 | static int h_24x7_event_commit_txn(struct pmu *pmu) | |
1598 | { | |
1599 | struct hv_24x7_request_buffer *request_buffer; | |
1600 | struct hv_24x7_data_result_buffer *result_buffer; | |
41f577eb | 1601 | struct hv_24x7_result *res, *next_res; |
88a48613 SB |
1602 | u64 count; |
1603 | int i, ret, txn_flags; | |
1604 | struct hv_24x7_hw *h24x7hw; | |
1605 | ||
1606 | txn_flags = __this_cpu_read(hv_24x7_txn_flags); | |
1607 | WARN_ON_ONCE(!txn_flags); | |
1608 | ||
1609 | ret = 0; | |
1610 | if (txn_flags & ~PERF_PMU_TXN_READ) | |
1611 | goto out; | |
1612 | ||
1613 | ret = __this_cpu_read(hv_24x7_txn_err); | |
1614 | if (ret) | |
1615 | goto out; | |
1616 | ||
1617 | request_buffer = (void *)get_cpu_var(hv_24x7_reqb); | |
1618 | result_buffer = (void *)get_cpu_var(hv_24x7_resb); | |
1619 | ||
1620 | ret = make_24x7_request(request_buffer, result_buffer); | |
62714a14 | 1621 | if (ret) |
88a48613 | 1622 | goto put_reqb; |
88a48613 SB |
1623 | |
1624 | h24x7hw = &get_cpu_var(hv_24x7_hw); | |
1625 | ||
41f577eb TJB |
1626 | /* Go through results in the result buffer to update event counts. */ |
1627 | for (i = 0, res = result_buffer->results; | |
1628 | i < result_buffer->num_results; i++, res = next_res) { | |
1629 | struct perf_event *event = h24x7hw->events[res->result_ix]; | |
41f577eb | 1630 | |
2e6553aa TJB |
1631 | ret = get_count_from_result(event, result_buffer, res, &count, |
1632 | &next_res); | |
1633 | if (ret) | |
1634 | break; | |
41f577eb | 1635 | |
88a48613 SB |
1636 | update_event_count(event, count); |
1637 | } | |
1638 | ||
1639 | put_cpu_var(hv_24x7_hw); | |
1640 | ||
1641 | put_reqb: | |
1642 | put_cpu_var(hv_24x7_resb); | |
1643 | put_cpu_var(hv_24x7_reqb); | |
1644 | out: | |
1645 | reset_txn(); | |
1646 | return ret; | |
1647 | } | |
1648 | ||
1649 | /* | |
1650 | * 24x7 counters only support READ transactions. They are always counting | |
1651 | * and dont need/support ADD transactions. However, regardless of type | |
1652 | * of transaction, all we need to do is cleanup, so we don't have to check | |
1653 | * the type of transaction. | |
1654 | */ | |
1655 | static void h_24x7_event_cancel_txn(struct pmu *pmu) | |
1656 | { | |
1657 | WARN_ON_ONCE(!__this_cpu_read(hv_24x7_txn_flags)); | |
1658 | reset_txn(); | |
1659 | } | |
1660 | ||
0e93a6ed CS |
1661 | static struct pmu h_24x7_pmu = { |
1662 | .task_ctx_nr = perf_invalid_context, | |
1663 | ||
1664 | .name = "hv_24x7", | |
1665 | .attr_groups = attr_groups, | |
1666 | .event_init = h_24x7_event_init, | |
1667 | .add = h_24x7_event_add, | |
1668 | .del = h_24x7_event_stop, | |
1669 | .start = h_24x7_event_start, | |
1670 | .stop = h_24x7_event_stop, | |
33ba14c0 | 1671 | .read = h_24x7_event_read, |
88a48613 SB |
1672 | .start_txn = h_24x7_event_start_txn, |
1673 | .commit_txn = h_24x7_event_commit_txn, | |
1674 | .cancel_txn = h_24x7_event_cancel_txn, | |
c2c9091d | 1675 | .capabilities = PERF_PMU_CAP_NO_EXCLUDE, |
0e93a6ed CS |
1676 | }; |
1677 | ||
1a8f0886 KJ |
1678 | static int ppc_hv_24x7_cpu_online(unsigned int cpu) |
1679 | { | |
1680 | if (cpumask_empty(&hv_24x7_cpumask)) | |
1681 | cpumask_set_cpu(cpu, &hv_24x7_cpumask); | |
1682 | ||
1683 | return 0; | |
1684 | } | |
1685 | ||
1686 | static int ppc_hv_24x7_cpu_offline(unsigned int cpu) | |
1687 | { | |
1688 | int target; | |
1689 | ||
1690 | /* Check if exiting cpu is used for collecting 24x7 events */ | |
1691 | if (!cpumask_test_and_clear_cpu(cpu, &hv_24x7_cpumask)) | |
1692 | return 0; | |
1693 | ||
1694 | /* Find a new cpu to collect 24x7 events */ | |
1695 | target = cpumask_last(cpu_active_mask); | |
1696 | ||
1697 | if (target < 0 || target >= nr_cpu_ids) { | |
1698 | pr_err("hv_24x7: CPU hotplug init failed\n"); | |
1699 | return -1; | |
1700 | } | |
1701 | ||
1702 | /* Migrate 24x7 events to the new target */ | |
1703 | cpumask_set_cpu(target, &hv_24x7_cpumask); | |
1704 | perf_pmu_migrate_context(&h_24x7_pmu, cpu, target); | |
1705 | ||
1706 | return 0; | |
1707 | } | |
1708 | ||
1709 | static int hv_24x7_cpu_hotplug_init(void) | |
1710 | { | |
1711 | return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE, | |
1712 | "perf/powerpc/hv_24x7:online", | |
1713 | ppc_hv_24x7_cpu_online, | |
1714 | ppc_hv_24x7_cpu_offline); | |
1715 | } | |
1716 | ||
0e93a6ed CS |
1717 | static int hv_24x7_init(void) |
1718 | { | |
1719 | int r; | |
1720 | unsigned long hret; | |
ec3eb9d9 | 1721 | unsigned int pvr = mfspr(SPRN_PVR); |
0e93a6ed CS |
1722 | struct hv_perf_caps caps; |
1723 | ||
1724 | if (!firmware_has_feature(FW_FEATURE_LPAR)) { | |
e98bf005 | 1725 | pr_debug("not a virtualized system, not enabling\n"); |
0e93a6ed | 1726 | return -ENODEV; |
ec3eb9d9 | 1727 | } |
2e6553aa TJB |
1728 | |
1729 | /* POWER8 only supports v1, while POWER9 only supports v2. */ | |
ec3eb9d9 | 1730 | if (PVR_VER(pvr) == PVR_POWER8) |
2e6553aa | 1731 | interface_version = 1; |
bfaa7834 | 1732 | else { |
2e6553aa | 1733 | interface_version = 2; |
0e93a6ed | 1734 | |
bfaa7834 TJB |
1735 | /* SMT8 in POWER9 needs to aggregate result elements. */ |
1736 | if (threads_per_core == 8) | |
1737 | aggregate_result_elements = true; | |
1738 | } | |
1739 | ||
0e93a6ed CS |
1740 | hret = hv_perf_caps_get(&caps); |
1741 | if (hret) { | |
e98bf005 | 1742 | pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", |
0e93a6ed CS |
1743 | hret); |
1744 | return -ENODEV; | |
1745 | } | |
1746 | ||
1747 | hv_page_cache = kmem_cache_create("hv-page-4096", 4096, 4096, 0, NULL); | |
1748 | if (!hv_page_cache) | |
1749 | return -ENOMEM; | |
1750 | ||
cc56d673 VW |
1751 | /* sampling not supported */ |
1752 | h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; | |
1753 | ||
7debc970 | 1754 | r = create_events_from_catalog(&event_group.attrs, |
5c5cd7b5 CS |
1755 | &event_desc_group.attrs, |
1756 | &event_long_desc_group.attrs); | |
1757 | ||
7debc970 LZ |
1758 | if (r) |
1759 | return r; | |
1760 | ||
1a8f0886 KJ |
1761 | /* init cpuhotplug */ |
1762 | r = hv_24x7_cpu_hotplug_init(); | |
1763 | if (r) | |
1764 | return r; | |
1765 | ||
0e93a6ed CS |
1766 | r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1); |
1767 | if (r) | |
1768 | return r; | |
1769 | ||
8ba21426 KJ |
1770 | read_24x7_sys_info(); |
1771 | ||
0e93a6ed CS |
1772 | return 0; |
1773 | } | |
1774 | ||
1775 | device_initcall(hv_24x7_init); |