Commit | Line | Data |
---|---|---|
0e93a6ed CS |
1 | /* |
2 | * Hypervisor supplied "24x7" performance counter support | |
3 | * | |
4 | * Author: Cody P Schafer <cody@linux.vnet.ibm.com> | |
5 | * Copyright 2014 IBM Corporation. | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation; either version | |
10 | * 2 of the License, or (at your option) any later version. | |
11 | */ | |
12 | ||
13 | #define pr_fmt(fmt) "hv-24x7: " fmt | |
14 | ||
15 | #include <linux/perf_event.h> | |
5c5cd7b5 | 16 | #include <linux/rbtree.h> |
0e93a6ed CS |
17 | #include <linux/module.h> |
18 | #include <linux/slab.h> | |
5c5cd7b5 CS |
19 | #include <linux/vmalloc.h> |
20 | ||
0e93a6ed CS |
21 | #include <asm/firmware.h> |
22 | #include <asm/hvcall.h> | |
23 | #include <asm/io.h> | |
5c5cd7b5 | 24 | #include <linux/byteorder/generic.h> |
0e93a6ed CS |
25 | |
26 | #include "hv-24x7.h" | |
27 | #include "hv-24x7-catalog.h" | |
28 | #include "hv-common.h" | |
29 | ||
5c5cd7b5 CS |
30 | static const char *event_domain_suffix(unsigned domain) |
31 | { | |
32 | switch (domain) { | |
33 | #define DOMAIN(n, v, x, c) \ | |
34 | case HV_PERF_DOMAIN_##n: \ | |
35 | return "__" #n; | |
36 | #include "hv-24x7-domains.h" | |
37 | #undef DOMAIN | |
38 | default: | |
39 | WARN(1, "unknown domain %d\n", domain); | |
40 | return "__UNKNOWN_DOMAIN_SUFFIX"; | |
41 | } | |
42 | } | |
43 | ||
44 | static bool domain_is_valid(unsigned domain) | |
45 | { | |
46 | switch (domain) { | |
47 | #define DOMAIN(n, v, x, c) \ | |
48 | case HV_PERF_DOMAIN_##n: \ | |
49 | /* fall through */ | |
50 | #include "hv-24x7-domains.h" | |
51 | #undef DOMAIN | |
52 | return true; | |
53 | default: | |
54 | return false; | |
55 | } | |
56 | } | |
57 | ||
58 | static bool is_physical_domain(unsigned domain) | |
59 | { | |
60 | switch (domain) { | |
61 | #define DOMAIN(n, v, x, c) \ | |
62 | case HV_PERF_DOMAIN_##n: \ | |
63 | return c; | |
64 | #include "hv-24x7-domains.h" | |
65 | #undef DOMAIN | |
66 | default: | |
67 | return false; | |
68 | } | |
69 | } | |
70 | ||
71 | static bool catalog_entry_domain_is_valid(unsigned domain) | |
72 | { | |
73 | return is_physical_domain(domain); | |
74 | } | |
75 | ||
0e93a6ed CS |
76 | /* |
77 | * TODO: Merging events: | |
78 | * - Think of the hcall as an interface to a 4d array of counters: | |
79 | * - x = domains | |
80 | * - y = indexes in the domain (core, chip, vcpu, node, etc) | |
81 | * - z = offset into the counter space | |
82 | * - w = lpars (guest vms, "logical partitions") | |
83 | * - A single request is: x,y,y_last,z,z_last,w,w_last | |
84 | * - this means we can retrieve a rectangle of counters in y,z for a single x. | |
85 | * | |
86 | * - Things to consider (ignoring w): | |
87 | * - input cost_per_request = 16 | |
88 | * - output cost_per_result(ys,zs) = 8 + 8 * ys + ys * zs | |
89 | * - limited number of requests per hcall (must fit into 4K bytes) | |
90 | * - 4k = 16 [buffer header] - 16 [request size] * request_count | |
91 | * - 255 requests per hcall | |
92 | * - sometimes it will be more efficient to read extra data and discard | |
93 | */ | |
94 | ||
95 | /* | |
96 | * Example usage: | |
5c5cd7b5 | 97 | * perf stat -e 'hv_24x7/domain=2,offset=8,vcpu=0,lpar=0xffffffff/' |
0e93a6ed CS |
98 | */ |
99 | ||
100 | /* u3 0-6, one of HV_24X7_PERF_DOMAIN */ | |
101 | EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3); | |
102 | /* u16 */ | |
5c5cd7b5 CS |
103 | EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31); |
104 | EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31); | |
0e93a6ed CS |
105 | /* u32, see "data_offset" */ |
106 | EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63); | |
107 | /* u16 */ | |
108 | EVENT_DEFINE_RANGE_FORMAT(lpar, config1, 0, 15); | |
109 | ||
110 | EVENT_DEFINE_RANGE(reserved1, config, 4, 15); | |
111 | EVENT_DEFINE_RANGE(reserved2, config1, 16, 63); | |
112 | EVENT_DEFINE_RANGE(reserved3, config2, 0, 63); | |
113 | ||
114 | static struct attribute *format_attrs[] = { | |
115 | &format_attr_domain.attr, | |
116 | &format_attr_offset.attr, | |
5c5cd7b5 CS |
117 | &format_attr_core.attr, |
118 | &format_attr_vcpu.attr, | |
0e93a6ed CS |
119 | &format_attr_lpar.attr, |
120 | NULL, | |
121 | }; | |
122 | ||
123 | static struct attribute_group format_group = { | |
124 | .name = "format", | |
125 | .attrs = format_attrs, | |
126 | }; | |
127 | ||
5c5cd7b5 CS |
128 | static struct attribute_group event_group = { |
129 | .name = "events", | |
130 | /* .attrs is set in init */ | |
131 | }; | |
132 | ||
133 | static struct attribute_group event_desc_group = { | |
134 | .name = "event_descs", | |
135 | /* .attrs is set in init */ | |
136 | }; | |
137 | ||
138 | static struct attribute_group event_long_desc_group = { | |
139 | .name = "event_long_descs", | |
140 | /* .attrs is set in init */ | |
141 | }; | |
142 | ||
0e93a6ed CS |
143 | static struct kmem_cache *hv_page_cache; |
144 | ||
88a48613 SB |
145 | DEFINE_PER_CPU(int, hv_24x7_txn_flags); |
146 | DEFINE_PER_CPU(int, hv_24x7_txn_err); | |
147 | ||
148 | struct hv_24x7_hw { | |
149 | struct perf_event *events[255]; | |
150 | }; | |
151 | ||
152 | DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); | |
153 | ||
145264e2 SB |
154 | /* |
155 | * request_buffer and result_buffer are not required to be 4k aligned, | |
156 | * but are not allowed to cross any 4k boundary. Aligning them to 4k is | |
157 | * the simplest way to ensure that. | |
158 | */ | |
159 | #define H24x7_DATA_BUFFER_SIZE 4096 | |
160 | DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); | |
161 | DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); | |
162 | ||
5c5cd7b5 CS |
163 | static char *event_name(struct hv_24x7_event_data *ev, int *len) |
164 | { | |
165 | *len = be16_to_cpu(ev->event_name_len) - 2; | |
166 | return (char *)ev->remainder; | |
167 | } | |
168 | ||
169 | static char *event_desc(struct hv_24x7_event_data *ev, int *len) | |
170 | { | |
171 | unsigned nl = be16_to_cpu(ev->event_name_len); | |
172 | __be16 *desc_len = (__be16 *)(ev->remainder + nl - 2); | |
3ca4ea71 | 173 | |
5c5cd7b5 CS |
174 | *len = be16_to_cpu(*desc_len) - 2; |
175 | return (char *)ev->remainder + nl; | |
176 | } | |
177 | ||
178 | static char *event_long_desc(struct hv_24x7_event_data *ev, int *len) | |
179 | { | |
180 | unsigned nl = be16_to_cpu(ev->event_name_len); | |
181 | __be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2); | |
182 | unsigned desc_len = be16_to_cpu(*desc_len_); | |
183 | __be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2); | |
3ca4ea71 | 184 | |
5c5cd7b5 CS |
185 | *len = be16_to_cpu(*long_desc_len) - 2; |
186 | return (char *)ev->remainder + nl + desc_len; | |
187 | } | |
188 | ||
189 | static bool event_fixed_portion_is_within(struct hv_24x7_event_data *ev, | |
190 | void *end) | |
191 | { | |
192 | void *start = ev; | |
193 | ||
194 | return (start + offsetof(struct hv_24x7_event_data, remainder)) < end; | |
195 | } | |
196 | ||
197 | /* | |
198 | * Things we don't check: | |
199 | * - padding for desc, name, and long/detailed desc is required to be '\0' | |
200 | * bytes. | |
201 | * | |
202 | * Return NULL if we pass end, | |
203 | * Otherwise return the address of the byte just following the event. | |
204 | */ | |
205 | static void *event_end(struct hv_24x7_event_data *ev, void *end) | |
206 | { | |
207 | void *start = ev; | |
208 | __be16 *dl_, *ldl_; | |
209 | unsigned dl, ldl; | |
210 | unsigned nl = be16_to_cpu(ev->event_name_len); | |
211 | ||
212 | if (nl < 2) { | |
213 | pr_debug("%s: name length too short: %d", __func__, nl); | |
214 | return NULL; | |
215 | } | |
216 | ||
217 | if (start + nl > end) { | |
218 | pr_debug("%s: start=%p + nl=%u > end=%p", | |
219 | __func__, start, nl, end); | |
220 | return NULL; | |
221 | } | |
222 | ||
223 | dl_ = (__be16 *)(ev->remainder + nl - 2); | |
224 | if (!IS_ALIGNED((uintptr_t)dl_, 2)) | |
225 | pr_warn("desc len not aligned %p", dl_); | |
226 | dl = be16_to_cpu(*dl_); | |
227 | if (dl < 2) { | |
228 | pr_debug("%s: desc len too short: %d", __func__, dl); | |
229 | return NULL; | |
230 | } | |
231 | ||
232 | if (start + nl + dl > end) { | |
233 | pr_debug("%s: (start=%p + nl=%u + dl=%u)=%p > end=%p", | |
234 | __func__, start, nl, dl, start + nl + dl, end); | |
235 | return NULL; | |
236 | } | |
237 | ||
238 | ldl_ = (__be16 *)(ev->remainder + nl + dl - 2); | |
239 | if (!IS_ALIGNED((uintptr_t)ldl_, 2)) | |
240 | pr_warn("long desc len not aligned %p", ldl_); | |
241 | ldl = be16_to_cpu(*ldl_); | |
242 | if (ldl < 2) { | |
243 | pr_debug("%s: long desc len too short (ldl=%u)", | |
244 | __func__, ldl); | |
245 | return NULL; | |
246 | } | |
247 | ||
248 | if (start + nl + dl + ldl > end) { | |
249 | pr_debug("%s: start=%p + nl=%u + dl=%u + ldl=%u > end=%p", | |
250 | __func__, start, nl, dl, ldl, end); | |
251 | return NULL; | |
252 | } | |
253 | ||
254 | return start + nl + dl + ldl; | |
255 | } | |
256 | ||
78d13166 CS |
257 | static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096, |
258 | unsigned long version, | |
259 | unsigned long index) | |
0e93a6ed | 260 | { |
78d13166 | 261 | pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)", |
3ca4ea71 SB |
262 | phys_4096, version, index); |
263 | ||
78d13166 | 264 | WARN_ON(!IS_ALIGNED(phys_4096, 4096)); |
3ca4ea71 | 265 | |
0e93a6ed | 266 | return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE, |
3ca4ea71 | 267 | phys_4096, version, index); |
0e93a6ed CS |
268 | } |
269 | ||
1ee9fcc1 | 270 | static unsigned long h_get_24x7_catalog_page(char page[], |
bbad3e50 | 271 | u64 version, u32 index) |
78d13166 CS |
272 | { |
273 | return h_get_24x7_catalog_page_(virt_to_phys(page), | |
274 | version, index); | |
275 | } | |
276 | ||
5c5cd7b5 CS |
277 | static unsigned core_domains[] = { |
278 | HV_PERF_DOMAIN_PHYS_CORE, | |
279 | HV_PERF_DOMAIN_VCPU_HOME_CORE, | |
280 | HV_PERF_DOMAIN_VCPU_HOME_CHIP, | |
281 | HV_PERF_DOMAIN_VCPU_HOME_NODE, | |
282 | HV_PERF_DOMAIN_VCPU_REMOTE_NODE, | |
283 | }; | |
284 | /* chip event data always yeilds a single event, core yeilds multiple */ | |
285 | #define MAX_EVENTS_PER_EVENT_DATA ARRAY_SIZE(core_domains) | |
286 | ||
287 | static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain) | |
288 | { | |
289 | const char *sindex; | |
290 | const char *lpar; | |
291 | ||
292 | if (is_physical_domain(domain)) { | |
293 | lpar = "0x0"; | |
294 | sindex = "core"; | |
295 | } else { | |
296 | lpar = "?"; | |
297 | sindex = "vcpu"; | |
298 | } | |
299 | ||
300 | return kasprintf(GFP_KERNEL, | |
301 | "domain=0x%x,offset=0x%x,%s=?,lpar=%s", | |
302 | domain, | |
303 | be16_to_cpu(event->event_counter_offs) + | |
304 | be16_to_cpu(event->event_group_record_offs), | |
305 | sindex, | |
306 | lpar); | |
307 | } | |
308 | ||
309 | /* Avoid trusting fw to NUL terminate strings */ | |
310 | static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp) | |
311 | { | |
312 | return kasprintf(gfp, "%.*s", max_len, maybe_str); | |
313 | } | |
314 | ||
315 | static ssize_t device_show_string(struct device *dev, | |
316 | struct device_attribute *attr, char *buf) | |
317 | { | |
318 | struct dev_ext_attribute *d; | |
319 | ||
320 | d = container_of(attr, struct dev_ext_attribute, attr); | |
3ca4ea71 | 321 | |
5c5cd7b5 CS |
322 | return sprintf(buf, "%s\n", (char *)d->var); |
323 | } | |
324 | ||
325 | static struct attribute *device_str_attr_create_(char *name, char *str) | |
326 | { | |
327 | struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL); | |
328 | ||
329 | if (!attr) | |
330 | return NULL; | |
331 | ||
442053e5 SB |
332 | sysfs_attr_init(&attr->attr.attr); |
333 | ||
5c5cd7b5 CS |
334 | attr->var = str; |
335 | attr->attr.attr.name = name; | |
336 | attr->attr.attr.mode = 0444; | |
337 | attr->attr.show = device_show_string; | |
3ca4ea71 | 338 | |
5c5cd7b5 CS |
339 | return &attr->attr.attr; |
340 | } | |
341 | ||
342 | static struct attribute *device_str_attr_create(char *name, int name_max, | |
343 | int name_nonce, | |
344 | char *str, size_t str_max) | |
345 | { | |
346 | char *n; | |
347 | char *s = memdup_to_str(str, str_max, GFP_KERNEL); | |
348 | struct attribute *a; | |
349 | ||
350 | if (!s) | |
351 | return NULL; | |
352 | ||
353 | if (!name_nonce) | |
354 | n = kasprintf(GFP_KERNEL, "%.*s", name_max, name); | |
355 | else | |
356 | n = kasprintf(GFP_KERNEL, "%.*s__%d", name_max, name, | |
357 | name_nonce); | |
358 | if (!n) | |
359 | goto out_s; | |
360 | ||
361 | a = device_str_attr_create_(n, s); | |
362 | if (!a) | |
363 | goto out_n; | |
364 | ||
365 | return a; | |
366 | out_n: | |
367 | kfree(n); | |
368 | out_s: | |
369 | kfree(s); | |
370 | return NULL; | |
371 | } | |
372 | ||
373 | static void device_str_attr_destroy(struct attribute *attr) | |
374 | { | |
375 | struct dev_ext_attribute *d; | |
376 | ||
377 | d = container_of(attr, struct dev_ext_attribute, attr.attr); | |
378 | kfree(d->var); | |
379 | kfree(d->attr.attr.name); | |
380 | kfree(d); | |
381 | } | |
382 | ||
383 | static struct attribute *event_to_attr(unsigned ix, | |
384 | struct hv_24x7_event_data *event, | |
385 | unsigned domain, | |
386 | int nonce) | |
387 | { | |
388 | int event_name_len; | |
389 | char *ev_name, *a_ev_name, *val; | |
390 | const char *ev_suffix; | |
391 | struct attribute *attr; | |
392 | ||
393 | if (!domain_is_valid(domain)) { | |
394 | pr_warn("catalog event %u has invalid domain %u\n", | |
395 | ix, domain); | |
396 | return NULL; | |
397 | } | |
398 | ||
399 | val = event_fmt(event, domain); | |
400 | if (!val) | |
401 | return NULL; | |
402 | ||
403 | ev_suffix = event_domain_suffix(domain); | |
404 | ev_name = event_name(event, &event_name_len); | |
405 | if (!nonce) | |
406 | a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s", | |
407 | (int)event_name_len, ev_name, ev_suffix); | |
408 | else | |
409 | a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d", | |
410 | (int)event_name_len, ev_name, ev_suffix, nonce); | |
411 | ||
5c5cd7b5 CS |
412 | if (!a_ev_name) |
413 | goto out_val; | |
414 | ||
415 | attr = device_str_attr_create_(a_ev_name, val); | |
416 | if (!attr) | |
417 | goto out_name; | |
418 | ||
419 | return attr; | |
420 | out_name: | |
421 | kfree(a_ev_name); | |
422 | out_val: | |
423 | kfree(val); | |
424 | return NULL; | |
425 | } | |
426 | ||
427 | static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event, | |
40386217 | 428 | int nonce) |
5c5cd7b5 CS |
429 | { |
430 | int nl, dl; | |
431 | char *name = event_name(event, &nl); | |
432 | char *desc = event_desc(event, &dl); | |
433 | ||
434 | /* If there isn't a description, don't create the sysfs file */ | |
435 | if (!dl) | |
436 | return NULL; | |
437 | ||
438 | return device_str_attr_create(name, nl, nonce, desc, dl); | |
439 | } | |
440 | ||
441 | static struct attribute * | |
442 | event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce) | |
443 | { | |
444 | int nl, dl; | |
445 | char *name = event_name(event, &nl); | |
446 | char *desc = event_long_desc(event, &dl); | |
447 | ||
448 | /* If there isn't a description, don't create the sysfs file */ | |
449 | if (!dl) | |
450 | return NULL; | |
451 | ||
452 | return device_str_attr_create(name, nl, nonce, desc, dl); | |
453 | } | |
454 | ||
455 | static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs, | |
40386217 | 456 | struct hv_24x7_event_data *event, int nonce) |
5c5cd7b5 CS |
457 | { |
458 | unsigned i; | |
459 | ||
460 | switch (event->domain) { | |
461 | case HV_PERF_DOMAIN_PHYS_CHIP: | |
462 | *attrs = event_to_attr(ix, event, event->domain, nonce); | |
463 | return 1; | |
464 | case HV_PERF_DOMAIN_PHYS_CORE: | |
465 | for (i = 0; i < ARRAY_SIZE(core_domains); i++) { | |
466 | attrs[i] = event_to_attr(ix, event, core_domains[i], | |
467 | nonce); | |
468 | if (!attrs[i]) { | |
469 | pr_warn("catalog event %u: individual attr %u " | |
470 | "creation failure\n", ix, i); | |
471 | for (; i; i--) | |
472 | device_str_attr_destroy(attrs[i - 1]); | |
473 | return -1; | |
474 | } | |
475 | } | |
476 | return i; | |
477 | default: | |
478 | pr_warn("catalog event %u: domain %u is not allowed in the " | |
479 | "catalog\n", ix, event->domain); | |
480 | return -1; | |
481 | } | |
482 | } | |
483 | ||
484 | static size_t event_to_attr_ct(struct hv_24x7_event_data *event) | |
485 | { | |
486 | switch (event->domain) { | |
487 | case HV_PERF_DOMAIN_PHYS_CHIP: | |
488 | return 1; | |
489 | case HV_PERF_DOMAIN_PHYS_CORE: | |
490 | return ARRAY_SIZE(core_domains); | |
491 | default: | |
492 | return 0; | |
493 | } | |
494 | } | |
495 | ||
5c5cd7b5 CS |
496 | /* */ |
497 | struct event_uniq { | |
498 | struct rb_node node; | |
499 | const char *name; | |
500 | int nl; | |
501 | unsigned ct; | |
502 | unsigned domain; | |
503 | }; | |
504 | ||
505 | static int memord(const void *d1, size_t s1, const void *d2, size_t s2) | |
506 | { | |
507 | if (s1 < s2) | |
508 | return 1; | |
509 | if (s2 > s1) | |
510 | return -1; | |
511 | ||
512 | return memcmp(d1, d2, s1); | |
513 | } | |
514 | ||
515 | static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2, | |
40386217 | 516 | size_t s2, unsigned d2) |
5c5cd7b5 CS |
517 | { |
518 | int r = memord(v1, s1, v2, s2); | |
519 | ||
520 | if (r) | |
521 | return r; | |
522 | if (d1 > d2) | |
523 | return 1; | |
524 | if (d2 > d1) | |
525 | return -1; | |
526 | return 0; | |
527 | } | |
528 | ||
529 | static int event_uniq_add(struct rb_root *root, const char *name, int nl, | |
40386217 | 530 | unsigned domain) |
5c5cd7b5 CS |
531 | { |
532 | struct rb_node **new = &(root->rb_node), *parent = NULL; | |
533 | struct event_uniq *data; | |
534 | ||
535 | /* Figure out where to put new node */ | |
536 | while (*new) { | |
537 | struct event_uniq *it; | |
538 | int result; | |
539 | ||
540 | it = container_of(*new, struct event_uniq, node); | |
541 | result = ev_uniq_ord(name, nl, domain, it->name, it->nl, | |
542 | it->domain); | |
543 | ||
544 | parent = *new; | |
545 | if (result < 0) | |
546 | new = &((*new)->rb_left); | |
547 | else if (result > 0) | |
548 | new = &((*new)->rb_right); | |
549 | else { | |
550 | it->ct++; | |
551 | pr_info("found a duplicate event %.*s, ct=%u\n", nl, | |
552 | name, it->ct); | |
553 | return it->ct; | |
554 | } | |
555 | } | |
556 | ||
557 | data = kmalloc(sizeof(*data), GFP_KERNEL); | |
558 | if (!data) | |
559 | return -ENOMEM; | |
560 | ||
561 | *data = (struct event_uniq) { | |
562 | .name = name, | |
563 | .nl = nl, | |
564 | .ct = 0, | |
565 | .domain = domain, | |
566 | }; | |
567 | ||
568 | /* Add new node and rebalance tree. */ | |
569 | rb_link_node(&data->node, parent, new); | |
570 | rb_insert_color(&data->node, root); | |
571 | ||
572 | /* data->ct */ | |
573 | return 0; | |
574 | } | |
575 | ||
576 | static void event_uniq_destroy(struct rb_root *root) | |
577 | { | |
578 | /* | |
579 | * the strings we point to are in the giant block of memory filled by | |
580 | * the catalog, and are freed separately. | |
581 | */ | |
582 | struct event_uniq *pos, *n; | |
583 | ||
584 | rbtree_postorder_for_each_entry_safe(pos, n, root, node) | |
585 | kfree(pos); | |
586 | } | |
587 | ||
588 | ||
589 | /* | |
590 | * ensure the event structure's sizes are self consistent and don't cause us to | |
591 | * read outside of the event | |
592 | * | |
593 | * On success, return the event length in bytes. | |
594 | * Otherwise, return -1 (and print as appropriate). | |
595 | */ | |
596 | static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event, | |
597 | size_t event_idx, | |
598 | size_t event_data_bytes, | |
599 | size_t event_entry_count, | |
600 | size_t offset, void *end) | |
601 | { | |
602 | ssize_t ev_len; | |
603 | void *ev_end, *calc_ev_end; | |
604 | ||
605 | if (offset >= event_data_bytes) | |
606 | return -1; | |
607 | ||
608 | if (event_idx >= event_entry_count) { | |
609 | pr_devel("catalog event data has %zu bytes of padding after last event\n", | |
610 | event_data_bytes - offset); | |
611 | return -1; | |
612 | } | |
613 | ||
614 | if (!event_fixed_portion_is_within(event, end)) { | |
615 | pr_warn("event %zu fixed portion is not within range\n", | |
616 | event_idx); | |
617 | return -1; | |
618 | } | |
619 | ||
620 | ev_len = be16_to_cpu(event->length); | |
621 | ||
622 | if (ev_len % 16) | |
623 | pr_info("event %zu has length %zu not divisible by 16: event=%pK\n", | |
624 | event_idx, ev_len, event); | |
625 | ||
626 | ev_end = (__u8 *)event + ev_len; | |
627 | if (ev_end > end) { | |
628 | pr_warn("event %zu has .length=%zu, ends after buffer end: ev_end=%pK > end=%pK, offset=%zu\n", | |
629 | event_idx, ev_len, ev_end, end, | |
630 | offset); | |
631 | return -1; | |
632 | } | |
633 | ||
634 | calc_ev_end = event_end(event, end); | |
635 | if (!calc_ev_end) { | |
636 | pr_warn("event %zu has a calculated length which exceeds buffer length %zu: event=%pK end=%pK, offset=%zu\n", | |
637 | event_idx, event_data_bytes, event, end, | |
638 | offset); | |
639 | return -1; | |
640 | } | |
641 | ||
642 | if (calc_ev_end > ev_end) { | |
643 | pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n", | |
644 | event_idx, event, ev_end, offset, calc_ev_end); | |
645 | return -1; | |
646 | } | |
647 | ||
648 | return ev_len; | |
649 | } | |
650 | ||
651 | #define MAX_4K (SIZE_MAX / 4096) | |
652 | ||
7debc970 | 653 | static int create_events_from_catalog(struct attribute ***events_, |
40386217 SB |
654 | struct attribute ***event_descs_, |
655 | struct attribute ***event_long_descs_) | |
5c5cd7b5 CS |
656 | { |
657 | unsigned long hret; | |
658 | size_t catalog_len, catalog_page_len, event_entry_count, | |
659 | event_data_len, event_data_offs, | |
660 | event_data_bytes, junk_events, event_idx, event_attr_ct, i, | |
661 | attr_max, event_idx_last, desc_ct, long_desc_ct; | |
662 | ssize_t ct, ev_len; | |
663 | uint32_t catalog_version_num; | |
664 | struct attribute **events, **event_descs, **event_long_descs; | |
665 | struct hv_24x7_catalog_page_0 *page_0 = | |
666 | kmem_cache_alloc(hv_page_cache, GFP_KERNEL); | |
667 | void *page = page_0; | |
668 | void *event_data, *end; | |
669 | struct hv_24x7_event_data *event; | |
670 | struct rb_root ev_uniq = RB_ROOT; | |
7debc970 | 671 | int ret = 0; |
5c5cd7b5 | 672 | |
7debc970 LZ |
673 | if (!page) { |
674 | ret = -ENOMEM; | |
5c5cd7b5 | 675 | goto e_out; |
7debc970 | 676 | } |
5c5cd7b5 CS |
677 | |
678 | hret = h_get_24x7_catalog_page(page, 0, 0); | |
7debc970 LZ |
679 | if (hret) { |
680 | ret = -EIO; | |
5c5cd7b5 | 681 | goto e_free; |
7debc970 | 682 | } |
5c5cd7b5 CS |
683 | |
684 | catalog_version_num = be64_to_cpu(page_0->version); | |
685 | catalog_page_len = be32_to_cpu(page_0->length); | |
686 | ||
687 | if (MAX_4K < catalog_page_len) { | |
688 | pr_err("invalid page count: %zu\n", catalog_page_len); | |
7debc970 | 689 | ret = -EIO; |
5c5cd7b5 CS |
690 | goto e_free; |
691 | } | |
692 | ||
693 | catalog_len = catalog_page_len * 4096; | |
694 | ||
695 | event_entry_count = be16_to_cpu(page_0->event_entry_count); | |
696 | event_data_offs = be16_to_cpu(page_0->event_data_offs); | |
697 | event_data_len = be16_to_cpu(page_0->event_data_len); | |
698 | ||
699 | pr_devel("cv %zu cl %zu eec %zu edo %zu edl %zu\n", | |
700 | (size_t)catalog_version_num, catalog_len, | |
701 | event_entry_count, event_data_offs, event_data_len); | |
702 | ||
703 | if ((MAX_4K < event_data_len) | |
704 | || (MAX_4K < event_data_offs) | |
705 | || (MAX_4K - event_data_offs < event_data_len)) { | |
706 | pr_err("invalid event data offs %zu and/or len %zu\n", | |
707 | event_data_offs, event_data_len); | |
7debc970 | 708 | ret = -EIO; |
5c5cd7b5 CS |
709 | goto e_free; |
710 | } | |
711 | ||
712 | if ((event_data_offs + event_data_len) > catalog_page_len) { | |
713 | pr_err("event data %zu-%zu does not fit inside catalog 0-%zu\n", | |
714 | event_data_offs, | |
715 | event_data_offs + event_data_len, | |
716 | catalog_page_len); | |
7debc970 | 717 | ret = -EIO; |
5c5cd7b5 CS |
718 | goto e_free; |
719 | } | |
720 | ||
721 | if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) { | |
722 | pr_err("event_entry_count %zu is invalid\n", | |
723 | event_entry_count); | |
7debc970 | 724 | ret = -EIO; |
5c5cd7b5 CS |
725 | goto e_free; |
726 | } | |
727 | ||
728 | event_data_bytes = event_data_len * 4096; | |
729 | ||
730 | /* | |
731 | * event data can span several pages, events can cross between these | |
732 | * pages. Use vmalloc to make this easier. | |
733 | */ | |
734 | event_data = vmalloc(event_data_bytes); | |
735 | if (!event_data) { | |
736 | pr_err("could not allocate event data\n"); | |
7debc970 | 737 | ret = -ENOMEM; |
5c5cd7b5 CS |
738 | goto e_free; |
739 | } | |
740 | ||
741 | end = event_data + event_data_bytes; | |
742 | ||
743 | /* | |
744 | * using vmalloc_to_phys() like this only works if PAGE_SIZE is | |
745 | * divisible by 4096 | |
746 | */ | |
747 | BUILD_BUG_ON(PAGE_SIZE % 4096); | |
748 | ||
749 | for (i = 0; i < event_data_len; i++) { | |
750 | hret = h_get_24x7_catalog_page_( | |
751 | vmalloc_to_phys(event_data + i * 4096), | |
752 | catalog_version_num, | |
753 | i + event_data_offs); | |
754 | if (hret) { | |
755 | pr_err("failed to get event data in page %zu\n", | |
756 | i + event_data_offs); | |
7debc970 | 757 | ret = -EIO; |
5c5cd7b5 CS |
758 | goto e_event_data; |
759 | } | |
760 | } | |
761 | ||
762 | /* | |
763 | * scan the catalog to determine the number of attributes we need, and | |
764 | * verify it at the same time. | |
765 | */ | |
766 | for (junk_events = 0, event = event_data, event_idx = 0, attr_max = 0; | |
767 | ; | |
768 | event_idx++, event = (void *)event + ev_len) { | |
769 | size_t offset = (void *)event - (void *)event_data; | |
770 | char *name; | |
771 | int nl; | |
772 | ||
773 | ev_len = catalog_event_len_validate(event, event_idx, | |
774 | event_data_bytes, | |
775 | event_entry_count, | |
776 | offset, end); | |
777 | if (ev_len < 0) | |
778 | break; | |
779 | ||
780 | name = event_name(event, &nl); | |
781 | ||
782 | if (event->event_group_record_len == 0) { | |
783 | pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n", | |
784 | event_idx, nl, name); | |
785 | junk_events++; | |
786 | continue; | |
787 | } | |
788 | ||
789 | if (!catalog_entry_domain_is_valid(event->domain)) { | |
790 | pr_info("event %zu (%.*s) has invalid domain %d\n", | |
791 | event_idx, nl, name, event->domain); | |
792 | junk_events++; | |
793 | continue; | |
794 | } | |
795 | ||
796 | attr_max += event_to_attr_ct(event); | |
797 | } | |
798 | ||
799 | event_idx_last = event_idx; | |
800 | if (event_idx_last != event_entry_count) | |
801 | pr_warn("event buffer ended before listed # of events were parsed (got %zu, wanted %zu, junk %zu)\n", | |
802 | event_idx_last, event_entry_count, junk_events); | |
803 | ||
804 | events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL); | |
7debc970 LZ |
805 | if (!events) { |
806 | ret = -ENOMEM; | |
5c5cd7b5 | 807 | goto e_event_data; |
7debc970 | 808 | } |
5c5cd7b5 CS |
809 | |
810 | event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs), | |
811 | GFP_KERNEL); | |
7debc970 LZ |
812 | if (!event_descs) { |
813 | ret = -ENOMEM; | |
5c5cd7b5 | 814 | goto e_event_attrs; |
7debc970 | 815 | } |
5c5cd7b5 CS |
816 | |
817 | event_long_descs = kmalloc_array(event_idx + 1, | |
818 | sizeof(*event_long_descs), GFP_KERNEL); | |
7debc970 LZ |
819 | if (!event_long_descs) { |
820 | ret = -ENOMEM; | |
5c5cd7b5 | 821 | goto e_event_descs; |
7debc970 | 822 | } |
5c5cd7b5 CS |
823 | |
824 | /* Iterate over the catalog filling in the attribute vector */ | |
825 | for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0, | |
826 | event = event_data, event_idx = 0; | |
827 | event_idx < event_idx_last; | |
828 | event_idx++, ev_len = be16_to_cpu(event->length), | |
829 | event = (void *)event + ev_len) { | |
830 | char *name; | |
831 | int nl; | |
832 | int nonce; | |
833 | /* | |
834 | * these are the only "bad" events that are intermixed and that | |
835 | * we can ignore without issue. make sure to skip them here | |
836 | */ | |
837 | if (event->event_group_record_len == 0) | |
838 | continue; | |
839 | if (!catalog_entry_domain_is_valid(event->domain)) | |
840 | continue; | |
841 | ||
842 | name = event_name(event, &nl); | |
843 | nonce = event_uniq_add(&ev_uniq, name, nl, event->domain); | |
844 | ct = event_data_to_attrs(event_idx, events + event_attr_ct, | |
845 | event, nonce); | |
846 | if (ct <= 0) { | |
847 | pr_warn("event %zu (%.*s) creation failure, skipping\n", | |
848 | event_idx, nl, name); | |
849 | junk_events++; | |
850 | } else { | |
851 | event_attr_ct += ct; | |
852 | event_descs[desc_ct] = event_to_desc_attr(event, nonce); | |
853 | if (event_descs[desc_ct]) | |
854 | desc_ct++; | |
855 | event_long_descs[long_desc_ct] = | |
856 | event_to_long_desc_attr(event, nonce); | |
857 | if (event_long_descs[long_desc_ct]) | |
858 | long_desc_ct++; | |
859 | } | |
860 | } | |
861 | ||
862 | pr_info("read %zu catalog entries, created %zu event attrs (%zu failures), %zu descs\n", | |
863 | event_idx, event_attr_ct, junk_events, desc_ct); | |
864 | ||
865 | events[event_attr_ct] = NULL; | |
866 | event_descs[desc_ct] = NULL; | |
867 | event_long_descs[long_desc_ct] = NULL; | |
868 | ||
869 | event_uniq_destroy(&ev_uniq); | |
870 | vfree(event_data); | |
871 | kmem_cache_free(hv_page_cache, page); | |
872 | ||
873 | *events_ = events; | |
874 | *event_descs_ = event_descs; | |
875 | *event_long_descs_ = event_long_descs; | |
7debc970 | 876 | return 0; |
5c5cd7b5 CS |
877 | |
878 | e_event_descs: | |
879 | kfree(event_descs); | |
880 | e_event_attrs: | |
881 | kfree(events); | |
882 | e_event_data: | |
883 | vfree(event_data); | |
884 | e_free: | |
885 | kmem_cache_free(hv_page_cache, page); | |
886 | e_out: | |
887 | *events_ = NULL; | |
888 | *event_descs_ = NULL; | |
889 | *event_long_descs_ = NULL; | |
7debc970 | 890 | return ret; |
5c5cd7b5 CS |
891 | } |
892 | ||
0e93a6ed CS |
893 | static ssize_t catalog_read(struct file *filp, struct kobject *kobj, |
894 | struct bin_attribute *bin_attr, char *buf, | |
895 | loff_t offset, size_t count) | |
896 | { | |
897 | unsigned long hret; | |
898 | ssize_t ret = 0; | |
56f12bee | 899 | size_t catalog_len = 0, catalog_page_len = 0; |
0e93a6ed | 900 | loff_t page_offset = 0; |
56f12bee | 901 | loff_t offset_in_page; |
902 | size_t copy_len; | |
bbad3e50 | 903 | uint64_t catalog_version_num = 0; |
0e93a6ed CS |
904 | void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); |
905 | struct hv_24x7_catalog_page_0 *page_0 = page; | |
3ca4ea71 | 906 | |
0e93a6ed CS |
907 | if (!page) |
908 | return -ENOMEM; | |
909 | ||
910 | hret = h_get_24x7_catalog_page(page, 0, 0); | |
911 | if (hret) { | |
912 | ret = -EIO; | |
913 | goto e_free; | |
914 | } | |
915 | ||
bbad3e50 | 916 | catalog_version_num = be64_to_cpu(page_0->version); |
0e93a6ed CS |
917 | catalog_page_len = be32_to_cpu(page_0->length); |
918 | catalog_len = catalog_page_len * 4096; | |
919 | ||
920 | page_offset = offset / 4096; | |
56f12bee | 921 | offset_in_page = offset % 4096; |
0e93a6ed CS |
922 | |
923 | if (page_offset >= catalog_page_len) | |
924 | goto e_free; | |
925 | ||
926 | if (page_offset != 0) { | |
927 | hret = h_get_24x7_catalog_page(page, catalog_version_num, | |
928 | page_offset); | |
929 | if (hret) { | |
930 | ret = -EIO; | |
931 | goto e_free; | |
932 | } | |
933 | } | |
934 | ||
56f12bee | 935 | copy_len = 4096 - offset_in_page; |
936 | if (copy_len > count) | |
937 | copy_len = count; | |
938 | ||
939 | memcpy(buf, page+offset_in_page, copy_len); | |
940 | ret = copy_len; | |
941 | ||
0e93a6ed CS |
942 | e_free: |
943 | if (hret) | |
bbad3e50 CS |
944 | pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:" |
945 | " rc=%ld\n", | |
946 | catalog_version_num, page_offset, hret); | |
d6589722 | 947 | kmem_cache_free(hv_page_cache, page); |
0e93a6ed | 948 | |
56f12bee | 949 | pr_devel("catalog_read: offset=%lld(%lld) count=%zu " |
950 | "catalog_len=%zu(%zu) => %zd\n", offset, page_offset, | |
951 | count, catalog_len, catalog_page_len, ret); | |
0e93a6ed CS |
952 | |
953 | return ret; | |
954 | } | |
955 | ||
956 | #define PAGE_0_ATTR(_name, _fmt, _expr) \ | |
957 | static ssize_t _name##_show(struct device *dev, \ | |
958 | struct device_attribute *dev_attr, \ | |
959 | char *buf) \ | |
960 | { \ | |
961 | unsigned long hret; \ | |
962 | ssize_t ret = 0; \ | |
963 | void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); \ | |
964 | struct hv_24x7_catalog_page_0 *page_0 = page; \ | |
965 | if (!page) \ | |
966 | return -ENOMEM; \ | |
967 | hret = h_get_24x7_catalog_page(page, 0, 0); \ | |
968 | if (hret) { \ | |
969 | ret = -EIO; \ | |
970 | goto e_free; \ | |
971 | } \ | |
972 | ret = sprintf(buf, _fmt, _expr); \ | |
973 | e_free: \ | |
ec2aef5a | 974 | kmem_cache_free(hv_page_cache, page); \ |
0e93a6ed CS |
975 | return ret; \ |
976 | } \ | |
977 | static DEVICE_ATTR_RO(_name) | |
978 | ||
979 | PAGE_0_ATTR(catalog_version, "%lld\n", | |
bbad3e50 | 980 | (unsigned long long)be64_to_cpu(page_0->version)); |
0e93a6ed CS |
981 | PAGE_0_ATTR(catalog_len, "%lld\n", |
982 | (unsigned long long)be32_to_cpu(page_0->length) * 4096); | |
983 | static BIN_ATTR_RO(catalog, 0/* real length varies */); | |
984 | ||
985 | static struct bin_attribute *if_bin_attrs[] = { | |
986 | &bin_attr_catalog, | |
987 | NULL, | |
988 | }; | |
989 | ||
990 | static struct attribute *if_attrs[] = { | |
991 | &dev_attr_catalog_len.attr, | |
992 | &dev_attr_catalog_version.attr, | |
993 | NULL, | |
994 | }; | |
995 | ||
996 | static struct attribute_group if_group = { | |
997 | .name = "interface", | |
998 | .bin_attrs = if_bin_attrs, | |
999 | .attrs = if_attrs, | |
1000 | }; | |
1001 | ||
1002 | static const struct attribute_group *attr_groups[] = { | |
1003 | &format_group, | |
5c5cd7b5 CS |
1004 | &event_group, |
1005 | &event_desc_group, | |
1006 | &event_long_desc_group, | |
0e93a6ed CS |
1007 | &if_group, |
1008 | NULL, | |
1009 | }; | |
1010 | ||
f954825d | 1011 | static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer, |
40386217 SB |
1012 | struct hv_24x7_data_result_buffer *result_buffer, |
1013 | unsigned long ret) | |
f954825d SB |
1014 | { |
1015 | struct hv_24x7_request *req; | |
1016 | ||
1017 | req = &request_buffer->requests[0]; | |
1018 | pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => " | |
1019 | "ret 0x%lx (%ld) detail=0x%x failing ix=%x\n", | |
1020 | req->performance_domain, req->data_offset, | |
1021 | req->starting_ix, req->starting_lpar_ix, ret, ret, | |
1022 | result_buffer->detailed_rc, | |
1023 | result_buffer->failing_request_ix); | |
1024 | } | |
1025 | ||
aeab199d SB |
1026 | /* |
1027 | * Start the process for a new H_GET_24x7_DATA hcall. | |
1028 | */ | |
1029 | static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer, | |
40386217 | 1030 | struct hv_24x7_data_result_buffer *result_buffer) |
aeab199d SB |
1031 | { |
1032 | ||
1033 | memset(request_buffer, 0, 4096); | |
1034 | memset(result_buffer, 0, 4096); | |
1035 | ||
1036 | request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT; | |
1037 | /* memset above set request_buffer->num_requests to 0 */ | |
1038 | } | |
f34b6c72 | 1039 | |
aeab199d SB |
1040 | /* |
1041 | * Commit (i.e perform) the H_GET_24x7_DATA hcall using the data collected | |
1042 | * by 'init_24x7_request()' and 'add_event_to_24x7_request()'. | |
1043 | */ | |
1044 | static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer, | |
40386217 | 1045 | struct hv_24x7_data_result_buffer *result_buffer) |
0e93a6ed | 1046 | { |
f34b6c72 | 1047 | unsigned long ret; |
0e93a6ed CS |
1048 | |
1049 | /* | |
aeab199d SB |
1050 | * NOTE: Due to variable number of array elements in request and |
1051 | * result buffer(s), sizeof() is not reliable. Use the actual | |
1052 | * allocated buffer size, H24x7_DATA_BUFFER_SIZE. | |
0e93a6ed | 1053 | */ |
aeab199d SB |
1054 | ret = plpar_hcall_norets(H_GET_24X7_DATA, |
1055 | virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE, | |
1056 | virt_to_phys(result_buffer), H24x7_DATA_BUFFER_SIZE); | |
1057 | ||
1058 | if (ret) | |
1059 | log_24x7_hcall(request_buffer, result_buffer, ret); | |
1060 | ||
1061 | return ret; | |
1062 | } | |
1063 | ||
e3ee15dc SB |
1064 | /* |
1065 | * Add the given @event to the next slot in the 24x7 request_buffer. | |
1066 | * | |
1067 | * Note that H_GET_24X7_DATA hcall allows reading several counters' | |
1068 | * values in a single HCALL. We expect the caller to add events to the | |
1069 | * request buffer one by one, make the HCALL and process the results. | |
1070 | */ | |
1071 | static int add_event_to_24x7_request(struct perf_event *event, | |
1072 | struct hv_24x7_request_buffer *request_buffer) | |
0e93a6ed | 1073 | { |
80798764 | 1074 | u16 idx; |
e3ee15dc SB |
1075 | int i; |
1076 | struct hv_24x7_request *req; | |
1077 | ||
1078 | if (request_buffer->num_requests > 254) { | |
1079 | pr_devel("Too many requests for 24x7 HCALL %d\n", | |
1080 | request_buffer->num_requests); | |
1081 | return -EINVAL; | |
1082 | } | |
1083 | ||
1084 | if (is_physical_domain(event_get_domain(event))) | |
1085 | idx = event_get_core(event); | |
1086 | else | |
1087 | idx = event_get_vcpu(event); | |
1088 | ||
1089 | i = request_buffer->num_requests++; | |
1090 | req = &request_buffer->requests[i]; | |
1091 | ||
1092 | req->performance_domain = event_get_domain(event); | |
1093 | req->data_size = cpu_to_be16(8); | |
1094 | req->data_offset = cpu_to_be32(event_get_offset(event)); | |
1095 | req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event)), | |
1096 | req->max_num_lpars = cpu_to_be16(1); | |
1097 | req->starting_ix = cpu_to_be16(idx); | |
1098 | req->max_ix = cpu_to_be16(1); | |
1099 | ||
1100 | return 0; | |
1101 | } | |
1102 | ||
1103 | static unsigned long single_24x7_request(struct perf_event *event, u64 *count) | |
1104 | { | |
f34b6c72 | 1105 | unsigned long ret; |
145264e2 SB |
1106 | struct hv_24x7_request_buffer *request_buffer; |
1107 | struct hv_24x7_data_result_buffer *result_buffer; | |
48bee8a6 CS |
1108 | |
1109 | BUILD_BUG_ON(sizeof(*request_buffer) > 4096); | |
1110 | BUILD_BUG_ON(sizeof(*result_buffer) > 4096); | |
1111 | ||
f34b6c72 | 1112 | request_buffer = (void *)get_cpu_var(hv_24x7_reqb); |
1113 | result_buffer = (void *)get_cpu_var(hv_24x7_resb); | |
48bee8a6 | 1114 | |
aeab199d | 1115 | init_24x7_request(request_buffer, result_buffer); |
0e93a6ed | 1116 | |
e3ee15dc SB |
1117 | ret = add_event_to_24x7_request(event, request_buffer); |
1118 | if (ret) | |
b816ce67 | 1119 | goto out; |
0e93a6ed | 1120 | |
aeab199d | 1121 | ret = make_24x7_request(request_buffer, result_buffer); |
0e93a6ed | 1122 | if (ret) { |
f954825d | 1123 | log_24x7_hcall(request_buffer, result_buffer, ret); |
f34b6c72 | 1124 | goto out; |
0e93a6ed CS |
1125 | } |
1126 | ||
aeab199d | 1127 | /* process result from hcall */ |
465345ca | 1128 | *count = be64_to_cpu(result_buffer->results[0].elements[0].element_data[0]); |
48bee8a6 | 1129 | |
48bee8a6 | 1130 | out: |
b816ce67 SB |
1131 | put_cpu_var(hv_24x7_reqb); |
1132 | put_cpu_var(hv_24x7_resb); | |
0e93a6ed CS |
1133 | return ret; |
1134 | } | |
1135 | ||
0e93a6ed CS |
1136 | |
1137 | static int h_24x7_event_init(struct perf_event *event) | |
1138 | { | |
1139 | struct hv_perf_caps caps; | |
1140 | unsigned domain; | |
1141 | unsigned long hret; | |
1142 | u64 ct; | |
1143 | ||
1144 | /* Not our event */ | |
1145 | if (event->attr.type != event->pmu->type) | |
1146 | return -ENOENT; | |
1147 | ||
1148 | /* Unused areas must be 0 */ | |
1149 | if (event_get_reserved1(event) || | |
1150 | event_get_reserved2(event) || | |
1151 | event_get_reserved3(event)) { | |
1152 | pr_devel("reserved set when forbidden 0x%llx(0x%llx) 0x%llx(0x%llx) 0x%llx(0x%llx)\n", | |
1153 | event->attr.config, | |
1154 | event_get_reserved1(event), | |
1155 | event->attr.config1, | |
1156 | event_get_reserved2(event), | |
1157 | event->attr.config2, | |
1158 | event_get_reserved3(event)); | |
1159 | return -EINVAL; | |
1160 | } | |
1161 | ||
1162 | /* unsupported modes and filters */ | |
1163 | if (event->attr.exclude_user || | |
1164 | event->attr.exclude_kernel || | |
1165 | event->attr.exclude_hv || | |
1166 | event->attr.exclude_idle || | |
1167 | event->attr.exclude_host || | |
cc56d673 | 1168 | event->attr.exclude_guest) |
0e93a6ed CS |
1169 | return -EINVAL; |
1170 | ||
1171 | /* no branch sampling */ | |
1172 | if (has_branch_stack(event)) | |
1173 | return -EOPNOTSUPP; | |
1174 | ||
1175 | /* offset must be 8 byte aligned */ | |
1176 | if (event_get_offset(event) % 8) { | |
1177 | pr_devel("bad alignment\n"); | |
1178 | return -EINVAL; | |
1179 | } | |
1180 | ||
1181 | /* Domains above 6 are invalid */ | |
1182 | domain = event_get_domain(event); | |
1183 | if (domain > 6) { | |
1184 | pr_devel("invalid domain %d\n", domain); | |
1185 | return -EINVAL; | |
1186 | } | |
1187 | ||
1188 | hret = hv_perf_caps_get(&caps); | |
1189 | if (hret) { | |
1190 | pr_devel("could not get capabilities: rc=%ld\n", hret); | |
1191 | return -EIO; | |
1192 | } | |
1193 | ||
5c5cd7b5 | 1194 | /* Physical domains & other lpars require extra capabilities */ |
0e93a6ed CS |
1195 | if (!caps.collect_privileged && (is_physical_domain(domain) || |
1196 | (event_get_lpar(event) != event_get_lpar_max()))) { | |
f42cf8d6 | 1197 | pr_devel("hv permissions disallow: is_physical_domain:%d, lpar=0x%llx\n", |
0e93a6ed CS |
1198 | is_physical_domain(domain), |
1199 | event_get_lpar(event)); | |
1200 | return -EACCES; | |
1201 | } | |
1202 | ||
1203 | /* see if the event complains */ | |
80798764 | 1204 | if (single_24x7_request(event, &ct)) { |
0e93a6ed CS |
1205 | pr_devel("test hcall failed\n"); |
1206 | return -EIO; | |
1207 | } | |
1208 | ||
1209 | return 0; | |
1210 | } | |
1211 | ||
1212 | static u64 h_24x7_get_value(struct perf_event *event) | |
1213 | { | |
1214 | unsigned long ret; | |
1215 | u64 ct; | |
80798764 | 1216 | ret = single_24x7_request(event, &ct); |
0e93a6ed CS |
1217 | if (ret) |
1218 | /* We checked this in event init, shouldn't fail here... */ | |
1219 | return 0; | |
1220 | ||
1221 | return ct; | |
1222 | } | |
1223 | ||
529ce8c9 | 1224 | static void update_event_count(struct perf_event *event, u64 now) |
0e93a6ed CS |
1225 | { |
1226 | s64 prev; | |
3ca4ea71 | 1227 | |
0e93a6ed CS |
1228 | prev = local64_xchg(&event->hw.prev_count, now); |
1229 | local64_add(now - prev, &event->count); | |
1230 | } | |
1231 | ||
529ce8c9 SB |
1232 | static void h_24x7_event_read(struct perf_event *event) |
1233 | { | |
1234 | u64 now; | |
88a48613 SB |
1235 | struct hv_24x7_request_buffer *request_buffer; |
1236 | struct hv_24x7_hw *h24x7hw; | |
1237 | int txn_flags; | |
1238 | ||
1239 | txn_flags = __this_cpu_read(hv_24x7_txn_flags); | |
1240 | ||
1241 | /* | |
1242 | * If in a READ transaction, add this counter to the list of | |
1243 | * counters to read during the next HCALL (i.e commit_txn()). | |
1244 | * If not in a READ transaction, go ahead and make the HCALL | |
1245 | * to read this counter by itself. | |
1246 | */ | |
1247 | ||
1248 | if (txn_flags & PERF_PMU_TXN_READ) { | |
1249 | int i; | |
1250 | int ret; | |
529ce8c9 | 1251 | |
88a48613 SB |
1252 | if (__this_cpu_read(hv_24x7_txn_err)) |
1253 | return; | |
1254 | ||
1255 | request_buffer = (void *)get_cpu_var(hv_24x7_reqb); | |
1256 | ||
1257 | ret = add_event_to_24x7_request(event, request_buffer); | |
1258 | if (ret) { | |
1259 | __this_cpu_write(hv_24x7_txn_err, ret); | |
1260 | } else { | |
1261 | /* | |
1262 | * Assoicate the event with the HCALL request index, | |
1263 | * so ->commit_txn() can quickly find/update count. | |
1264 | */ | |
1265 | i = request_buffer->num_requests - 1; | |
1266 | ||
1267 | h24x7hw = &get_cpu_var(hv_24x7_hw); | |
1268 | h24x7hw->events[i] = event; | |
1269 | put_cpu_var(h24x7hw); | |
1270 | } | |
1271 | ||
1272 | put_cpu_var(hv_24x7_reqb); | |
1273 | } else { | |
1274 | now = h_24x7_get_value(event); | |
1275 | update_event_count(event, now); | |
1276 | } | |
529ce8c9 SB |
1277 | } |
1278 | ||
0e93a6ed CS |
1279 | static void h_24x7_event_start(struct perf_event *event, int flags) |
1280 | { | |
1281 | if (flags & PERF_EF_RELOAD) | |
1282 | local64_set(&event->hw.prev_count, h_24x7_get_value(event)); | |
1283 | } | |
1284 | ||
1285 | static void h_24x7_event_stop(struct perf_event *event, int flags) | |
1286 | { | |
33ba14c0 | 1287 | h_24x7_event_read(event); |
0e93a6ed CS |
1288 | } |
1289 | ||
1290 | static int h_24x7_event_add(struct perf_event *event, int flags) | |
1291 | { | |
1292 | if (flags & PERF_EF_START) | |
1293 | h_24x7_event_start(event, flags); | |
1294 | ||
1295 | return 0; | |
1296 | } | |
1297 | ||
88a48613 SB |
1298 | /* |
1299 | * 24x7 counters only support READ transactions. They are | |
1300 | * always counting and dont need/support ADD transactions. | |
1301 | * Cache the flags, but otherwise ignore transactions that | |
1302 | * are not PERF_PMU_TXN_READ. | |
1303 | */ | |
1304 | static void h_24x7_event_start_txn(struct pmu *pmu, unsigned int flags) | |
1305 | { | |
1306 | struct hv_24x7_request_buffer *request_buffer; | |
1307 | struct hv_24x7_data_result_buffer *result_buffer; | |
1308 | ||
1309 | /* We should not be called if we are already in a txn */ | |
1310 | WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags)); | |
1311 | ||
1312 | __this_cpu_write(hv_24x7_txn_flags, flags); | |
1313 | if (flags & ~PERF_PMU_TXN_READ) | |
1314 | return; | |
1315 | ||
1316 | request_buffer = (void *)get_cpu_var(hv_24x7_reqb); | |
1317 | result_buffer = (void *)get_cpu_var(hv_24x7_resb); | |
1318 | ||
1319 | init_24x7_request(request_buffer, result_buffer); | |
1320 | ||
1321 | put_cpu_var(hv_24x7_resb); | |
1322 | put_cpu_var(hv_24x7_reqb); | |
1323 | } | |
1324 | ||
1325 | /* | |
1326 | * Clean up transaction state. | |
1327 | * | |
1328 | * NOTE: Ignore state of request and result buffers for now. | |
1329 | * We will initialize them during the next read/txn. | |
1330 | */ | |
1331 | static void reset_txn(void) | |
1332 | { | |
1333 | __this_cpu_write(hv_24x7_txn_flags, 0); | |
1334 | __this_cpu_write(hv_24x7_txn_err, 0); | |
1335 | } | |
1336 | ||
1337 | /* | |
1338 | * 24x7 counters only support READ transactions. They are always counting | |
1339 | * and dont need/support ADD transactions. Clear ->txn_flags but otherwise | |
1340 | * ignore transactions that are not of type PERF_PMU_TXN_READ. | |
1341 | * | |
1342 | * For READ transactions, submit all pending 24x7 requests (i.e requests | |
1343 | * that were queued by h_24x7_event_read()), to the hypervisor and update | |
1344 | * the event counts. | |
1345 | */ | |
1346 | static int h_24x7_event_commit_txn(struct pmu *pmu) | |
1347 | { | |
1348 | struct hv_24x7_request_buffer *request_buffer; | |
1349 | struct hv_24x7_data_result_buffer *result_buffer; | |
1350 | struct hv_24x7_result *resb; | |
1351 | struct perf_event *event; | |
1352 | u64 count; | |
1353 | int i, ret, txn_flags; | |
1354 | struct hv_24x7_hw *h24x7hw; | |
1355 | ||
1356 | txn_flags = __this_cpu_read(hv_24x7_txn_flags); | |
1357 | WARN_ON_ONCE(!txn_flags); | |
1358 | ||
1359 | ret = 0; | |
1360 | if (txn_flags & ~PERF_PMU_TXN_READ) | |
1361 | goto out; | |
1362 | ||
1363 | ret = __this_cpu_read(hv_24x7_txn_err); | |
1364 | if (ret) | |
1365 | goto out; | |
1366 | ||
1367 | request_buffer = (void *)get_cpu_var(hv_24x7_reqb); | |
1368 | result_buffer = (void *)get_cpu_var(hv_24x7_resb); | |
1369 | ||
1370 | ret = make_24x7_request(request_buffer, result_buffer); | |
1371 | if (ret) { | |
1372 | log_24x7_hcall(request_buffer, result_buffer, ret); | |
1373 | goto put_reqb; | |
1374 | } | |
1375 | ||
1376 | h24x7hw = &get_cpu_var(hv_24x7_hw); | |
1377 | ||
1378 | /* Update event counts from hcall */ | |
1379 | for (i = 0; i < request_buffer->num_requests; i++) { | |
1380 | resb = &result_buffer->results[i]; | |
1381 | count = be64_to_cpu(resb->elements[0].element_data[0]); | |
1382 | event = h24x7hw->events[i]; | |
1383 | h24x7hw->events[i] = NULL; | |
1384 | update_event_count(event, count); | |
1385 | } | |
1386 | ||
1387 | put_cpu_var(hv_24x7_hw); | |
1388 | ||
1389 | put_reqb: | |
1390 | put_cpu_var(hv_24x7_resb); | |
1391 | put_cpu_var(hv_24x7_reqb); | |
1392 | out: | |
1393 | reset_txn(); | |
1394 | return ret; | |
1395 | } | |
1396 | ||
1397 | /* | |
1398 | * 24x7 counters only support READ transactions. They are always counting | |
1399 | * and dont need/support ADD transactions. However, regardless of type | |
1400 | * of transaction, all we need to do is cleanup, so we don't have to check | |
1401 | * the type of transaction. | |
1402 | */ | |
1403 | static void h_24x7_event_cancel_txn(struct pmu *pmu) | |
1404 | { | |
1405 | WARN_ON_ONCE(!__this_cpu_read(hv_24x7_txn_flags)); | |
1406 | reset_txn(); | |
1407 | } | |
1408 | ||
0e93a6ed CS |
1409 | static struct pmu h_24x7_pmu = { |
1410 | .task_ctx_nr = perf_invalid_context, | |
1411 | ||
1412 | .name = "hv_24x7", | |
1413 | .attr_groups = attr_groups, | |
1414 | .event_init = h_24x7_event_init, | |
1415 | .add = h_24x7_event_add, | |
1416 | .del = h_24x7_event_stop, | |
1417 | .start = h_24x7_event_start, | |
1418 | .stop = h_24x7_event_stop, | |
33ba14c0 | 1419 | .read = h_24x7_event_read, |
88a48613 SB |
1420 | .start_txn = h_24x7_event_start_txn, |
1421 | .commit_txn = h_24x7_event_commit_txn, | |
1422 | .cancel_txn = h_24x7_event_cancel_txn, | |
0e93a6ed CS |
1423 | }; |
1424 | ||
1425 | static int hv_24x7_init(void) | |
1426 | { | |
1427 | int r; | |
1428 | unsigned long hret; | |
1429 | struct hv_perf_caps caps; | |
1430 | ||
1431 | if (!firmware_has_feature(FW_FEATURE_LPAR)) { | |
e98bf005 | 1432 | pr_debug("not a virtualized system, not enabling\n"); |
0e93a6ed CS |
1433 | return -ENODEV; |
1434 | } | |
1435 | ||
1436 | hret = hv_perf_caps_get(&caps); | |
1437 | if (hret) { | |
e98bf005 | 1438 | pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", |
0e93a6ed CS |
1439 | hret); |
1440 | return -ENODEV; | |
1441 | } | |
1442 | ||
1443 | hv_page_cache = kmem_cache_create("hv-page-4096", 4096, 4096, 0, NULL); | |
1444 | if (!hv_page_cache) | |
1445 | return -ENOMEM; | |
1446 | ||
cc56d673 VW |
1447 | /* sampling not supported */ |
1448 | h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; | |
1449 | ||
7debc970 | 1450 | r = create_events_from_catalog(&event_group.attrs, |
5c5cd7b5 CS |
1451 | &event_desc_group.attrs, |
1452 | &event_long_desc_group.attrs); | |
1453 | ||
7debc970 LZ |
1454 | if (r) |
1455 | return r; | |
1456 | ||
0e93a6ed CS |
1457 | r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1); |
1458 | if (r) | |
1459 | return r; | |
1460 | ||
1461 | return 0; | |
1462 | } | |
1463 | ||
1464 | device_initcall(hv_24x7_init); |