Commit | Line | Data |
---|---|---|
a17ae4c3 | 1 | // SPDX-License-Identifier: GPL-2.0 |
8c069ff4 HB |
2 | /* |
3 | * Performance event support for the System z CPU-measurement Sampling Facility | |
4 | * | |
5 | * Copyright IBM Corp. 2013 | |
6 | * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> | |
8c069ff4 HB |
7 | */ |
8 | #define KMSG_COMPONENT "cpum_sf" | |
9 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | |
10 | ||
11 | #include <linux/kernel.h> | |
12 | #include <linux/kernel_stat.h> | |
13 | #include <linux/perf_event.h> | |
14 | #include <linux/percpu.h> | |
544e8dd7 | 15 | #include <linux/pid.h> |
8c069ff4 HB |
16 | #include <linux/notifier.h> |
17 | #include <linux/export.h> | |
7e75fc3f | 18 | #include <linux/slab.h> |
69f239ed HB |
19 | #include <linux/mm.h> |
20 | #include <linux/moduleparam.h> | |
8c069ff4 HB |
21 | #include <asm/cpu_mf.h> |
22 | #include <asm/irq.h> | |
23 | #include <asm/debug.h> | |
24 | #include <asm/timex.h> | |
25 | ||
26 | /* Minimum number of sample-data-block-tables: | |
27 | * At least one table is required for the sampling buffer structure. | |
28 | * A single table contains up to 511 pointers to sample-data-blocks. | |
29 | */ | |
69f239ed | 30 | #define CPUM_SF_MIN_SDBT 1 |
8c069ff4 | 31 | |
69f239ed | 32 | /* Number of sample-data-blocks per sample-data-block-table (SDBT): |
7e75fc3f HB |
33 | * A table contains SDB pointers (8 bytes) and one table-link entry |
34 | * that points to the origin of the next SDBT. | |
8c069ff4 | 35 | */ |
69f239ed | 36 | #define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) |
8c069ff4 | 37 | |
69f239ed HB |
38 | /* Maximum page offset for an SDBT table-link entry: |
39 | * If this page offset is reached, a table-link entry to the next SDBT | |
40 | * must be added. | |
41 | */ | |
42 | #define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8) | |
43 | static inline int require_table_link(const void *sdbt) | |
44 | { | |
45 | return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET; | |
46 | } | |
47 | ||
48 | /* Minimum and maximum sampling buffer sizes: | |
49 | * | |
7e75fc3f HB |
50 | * This number represents the maximum size of the sampling buffer taking |
51 | * the number of sample-data-block-tables into account. Note that these | |
52 | * numbers apply to the basic-sampling function only. | |
53 | * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if | |
54 | * the diagnostic-sampling function is active. | |
8c069ff4 | 55 | * |
69f239ed HB |
56 | * Sampling buffer size Buffer characteristics |
57 | * --------------------------------------------------- | |
58 | * 64KB == 16 pages (4KB per page) | |
59 | * 1 page for SDB-tables | |
60 | * 15 pages for SDBs | |
61 | * | |
62 | * 32MB == 8192 pages (4KB per page) | |
63 | * 16 pages for SDB-tables | |
64 | * 8176 pages for SDBs | |
8c069ff4 | 65 | */ |
69f239ed HB |
66 | static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; |
67 | static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; | |
7e75fc3f | 68 | static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1; |
8c069ff4 HB |
69 | |
70 | struct sf_buffer { | |
69f239ed | 71 | unsigned long *sdbt; /* Sample-data-block-table origin */ |
8c069ff4 | 72 | /* buffer characteristics (required for buffer increments) */ |
69f239ed HB |
73 | unsigned long num_sdb; /* Number of sample-data-blocks */ |
74 | unsigned long num_sdbt; /* Number of sample-data-block-tables */ | |
75 | unsigned long *tail; /* last sample-data-block-table */ | |
8c069ff4 HB |
76 | }; |
77 | ||
ca5955cd PH |
78 | struct aux_buffer { |
79 | struct sf_buffer sfb; | |
80 | unsigned long head; /* index of SDB of buffer head */ | |
81 | unsigned long alert_mark; /* index of SDB of alert request position */ | |
82 | unsigned long empty_mark; /* mark of SDB not marked full */ | |
83 | unsigned long *sdb_index; /* SDB address for fast lookup */ | |
84 | unsigned long *sdbt_index; /* SDBT address for fast lookup */ | |
85 | }; | |
86 | ||
8c069ff4 HB |
87 | struct cpu_hw_sf { |
88 | /* CPU-measurement sampling information block */ | |
89 | struct hws_qsi_info_block qsi; | |
69f239ed | 90 | /* CPU-measurement sampling control block */ |
8c069ff4 HB |
91 | struct hws_lsctl_request_block lsctl; |
92 | struct sf_buffer sfb; /* Sampling buffer */ | |
93 | unsigned int flags; /* Status flags */ | |
94 | struct perf_event *event; /* Scheduled perf event */ | |
ca5955cd | 95 | struct perf_output_handle handle; /* AUX buffer output handle */ |
8c069ff4 HB |
96 | }; |
97 | static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); | |
98 | ||
99 | /* Debug feature */ | |
100 | static debug_info_t *sfdbg; | |
101 | ||
69f239ed HB |
102 | /* |
103 | * sf_disable() - Switch off sampling facility | |
104 | */ | |
105 | static int sf_disable(void) | |
106 | { | |
107 | struct hws_lsctl_request_block sreq; | |
108 | ||
109 | memset(&sreq, 0, sizeof(sreq)); | |
110 | return lsctl(&sreq); | |
111 | } | |
112 | ||
8c069ff4 HB |
113 | /* |
114 | * sf_buffer_available() - Check for an allocated sampling buffer | |
115 | */ | |
116 | static int sf_buffer_available(struct cpu_hw_sf *cpuhw) | |
117 | { | |
69f239ed | 118 | return !!cpuhw->sfb.sdbt; |
8c069ff4 HB |
119 | } |
120 | ||
121 | /* | |
122 | * deallocate sampling facility buffer | |
123 | */ | |
124 | static void free_sampling_buffer(struct sf_buffer *sfb) | |
125 | { | |
69f239ed | 126 | unsigned long *sdbt, *curr; |
8c069ff4 HB |
127 | |
128 | if (!sfb->sdbt) | |
129 | return; | |
130 | ||
131 | sdbt = sfb->sdbt; | |
69f239ed | 132 | curr = sdbt; |
8c069ff4 | 133 | |
69f239ed | 134 | /* Free the SDBT after all SDBs are processed... */ |
8c069ff4 HB |
135 | while (1) { |
136 | if (!*curr || !sdbt) | |
137 | break; | |
138 | ||
69f239ed | 139 | /* Process table-link entries */ |
8c069ff4 HB |
140 | if (is_link_entry(curr)) { |
141 | curr = get_next_sdbt(curr); | |
142 | if (sdbt) | |
69f239ed | 143 | free_page((unsigned long) sdbt); |
8c069ff4 | 144 | |
69f239ed HB |
145 | /* If the origin is reached, sampling buffer is freed */ |
146 | if (curr == sfb->sdbt) | |
8c069ff4 HB |
147 | break; |
148 | else | |
69f239ed | 149 | sdbt = curr; |
8c069ff4 | 150 | } else { |
69f239ed | 151 | /* Process SDB pointer */ |
8c069ff4 HB |
152 | if (*curr) { |
153 | free_page(*curr); | |
154 | curr++; | |
155 | } | |
156 | } | |
157 | } | |
158 | ||
159 | debug_sprintf_event(sfdbg, 5, | |
69f239ed | 160 | "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt); |
8c069ff4 HB |
161 | memset(sfb, 0, sizeof(*sfb)); |
162 | } | |
163 | ||
69f239ed HB |
164 | static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) |
165 | { | |
166 | unsigned long sdb, *trailer; | |
167 | ||
168 | /* Allocate and initialize sample-data-block */ | |
169 | sdb = get_zeroed_page(gfp_flags); | |
170 | if (!sdb) | |
171 | return -ENOMEM; | |
172 | trailer = trailer_entry_ptr(sdb); | |
173 | *trailer = SDB_TE_ALERT_REQ_MASK; | |
174 | ||
175 | /* Link SDB into the sample-data-block-table */ | |
176 | *sdbt = sdb; | |
177 | ||
178 | return 0; | |
179 | } | |
180 | ||
181 | /* | |
182 | * realloc_sampling_buffer() - extend sampler memory | |
183 | * | |
184 | * Allocates new sample-data-blocks and adds them to the specified sampling | |
185 | * buffer memory. | |
186 | * | |
187 | * Important: This modifies the sampling buffer and must be called when the | |
188 | * sampling facility is disabled. | |
189 | * | |
190 | * Returns zero on success, non-zero otherwise. | |
191 | */ | |
192 | static int realloc_sampling_buffer(struct sf_buffer *sfb, | |
193 | unsigned long num_sdb, gfp_t gfp_flags) | |
194 | { | |
195 | int i, rc; | |
196 | unsigned long *new, *tail; | |
197 | ||
198 | if (!sfb->sdbt || !sfb->tail) | |
199 | return -EINVAL; | |
200 | ||
201 | if (!is_link_entry(sfb->tail)) | |
202 | return -EINVAL; | |
203 | ||
204 | /* Append to the existing sampling buffer, overwriting the table-link | |
205 | * register. | |
206 | * The tail variables always points to the "tail" (last and table-link) | |
207 | * entry in an SDB-table. | |
208 | */ | |
209 | tail = sfb->tail; | |
210 | ||
211 | /* Do a sanity check whether the table-link entry points to | |
212 | * the sampling buffer origin. | |
213 | */ | |
214 | if (sfb->sdbt != get_next_sdbt(tail)) { | |
215 | debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: " | |
216 | "sampling buffer is not linked: origin=%p" | |
217 | "tail=%p\n", | |
218 | (void *) sfb->sdbt, (void *) tail); | |
219 | return -EINVAL; | |
220 | } | |
221 | ||
222 | /* Allocate remaining SDBs */ | |
223 | rc = 0; | |
224 | for (i = 0; i < num_sdb; i++) { | |
225 | /* Allocate a new SDB-table if it is full. */ | |
226 | if (require_table_link(tail)) { | |
227 | new = (unsigned long *) get_zeroed_page(gfp_flags); | |
228 | if (!new) { | |
229 | rc = -ENOMEM; | |
230 | break; | |
231 | } | |
232 | sfb->num_sdbt++; | |
233 | /* Link current page to tail of chain */ | |
234 | *tail = (unsigned long)(void *) new + 1; | |
235 | tail = new; | |
236 | } | |
237 | ||
238 | /* Allocate a new sample-data-block. | |
239 | * If there is not enough memory, stop the realloc process | |
240 | * and simply use what was allocated. If this is a temporary | |
241 | * issue, a new realloc call (if required) might succeed. | |
242 | */ | |
243 | rc = alloc_sample_data_block(tail, gfp_flags); | |
244 | if (rc) | |
245 | break; | |
246 | sfb->num_sdb++; | |
247 | tail++; | |
248 | } | |
249 | ||
250 | /* Link sampling buffer to its origin */ | |
251 | *tail = (unsigned long) sfb->sdbt + 1; | |
252 | sfb->tail = tail; | |
253 | ||
254 | debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" | |
255 | " settings: sdbt=%lu sdb=%lu\n", | |
256 | sfb->num_sdbt, sfb->num_sdb); | |
257 | return rc; | |
258 | } | |
259 | ||
8c069ff4 HB |
260 | /* |
261 | * allocate_sampling_buffer() - allocate sampler memory | |
262 | * | |
263 | * Allocates and initializes a sampling buffer structure using the | |
264 | * specified number of sample-data-blocks (SDB). For each allocation, | |
265 | * a 4K page is used. The number of sample-data-block-tables (SDBT) | |
266 | * are calculated from SDBs. | |
267 | * Also set the ALERT_REQ mask in each SDBs trailer. | |
268 | * | |
269 | * Returns zero on success, non-zero otherwise. | |
270 | */ | |
271 | static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) | |
272 | { | |
69f239ed | 273 | int rc; |
8c069ff4 HB |
274 | |
275 | if (sfb->sdbt) | |
276 | return -EINVAL; | |
69f239ed HB |
277 | |
278 | /* Allocate the sample-data-block-table origin */ | |
279 | sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); | |
280 | if (!sfb->sdbt) | |
281 | return -ENOMEM; | |
8c069ff4 | 282 | sfb->num_sdb = 0; |
69f239ed | 283 | sfb->num_sdbt = 1; |
8c069ff4 | 284 | |
69f239ed HB |
285 | /* Link the table origin to point to itself to prepare for |
286 | * realloc_sampling_buffer() invocation. | |
287 | */ | |
288 | sfb->tail = sfb->sdbt; | |
289 | *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1; | |
8c069ff4 | 290 | |
69f239ed HB |
291 | /* Allocate requested number of sample-data-blocks */ |
292 | rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); | |
293 | if (rc) { | |
294 | free_sampling_buffer(sfb); | |
295 | debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " | |
296 | "realloc_sampling_buffer failed with rc=%i\n", rc); | |
297 | } else | |
298 | debug_sprintf_event(sfdbg, 4, | |
299 | "alloc_sampling_buffer: tear=%p dear=%p\n", | |
300 | sfb->sdbt, (void *) *sfb->sdbt); | |
301 | return rc; | |
302 | } | |
8c069ff4 | 303 | |
69f239ed HB |
304 | static void sfb_set_limits(unsigned long min, unsigned long max) |
305 | { | |
7e75fc3f HB |
306 | struct hws_qsi_info_block si; |
307 | ||
69f239ed HB |
308 | CPUM_SF_MIN_SDB = min; |
309 | CPUM_SF_MAX_SDB = max; | |
7e75fc3f HB |
310 | |
311 | memset(&si, 0, sizeof(si)); | |
312 | if (!qsi(&si)) | |
313 | CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes); | |
314 | } | |
315 | ||
316 | static unsigned long sfb_max_limit(struct hw_perf_event *hwc) | |
317 | { | |
318 | return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR | |
319 | : CPUM_SF_MAX_SDB; | |
69f239ed | 320 | } |
8c069ff4 | 321 | |
69f239ed HB |
322 | static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, |
323 | struct hw_perf_event *hwc) | |
324 | { | |
325 | if (!sfb->sdbt) | |
326 | return SFB_ALLOC_REG(hwc); | |
327 | if (SFB_ALLOC_REG(hwc) > sfb->num_sdb) | |
328 | return SFB_ALLOC_REG(hwc) - sfb->num_sdb; | |
329 | return 0; | |
330 | } | |
8c069ff4 | 331 | |
69f239ed HB |
332 | static int sfb_has_pending_allocs(struct sf_buffer *sfb, |
333 | struct hw_perf_event *hwc) | |
334 | { | |
335 | return sfb_pending_allocs(sfb, hwc) > 0; | |
336 | } | |
8c069ff4 | 337 | |
69f239ed HB |
338 | static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) |
339 | { | |
7e75fc3f HB |
340 | /* Limit the number of SDBs to not exceed the maximum */ |
341 | num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc)); | |
69f239ed HB |
342 | if (num) |
343 | SFB_ALLOC_REG(hwc) += num; | |
8c069ff4 HB |
344 | } |
345 | ||
69f239ed HB |
346 | static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) |
347 | { | |
348 | SFB_ALLOC_REG(hwc) = 0; | |
349 | sfb_account_allocs(num, hwc); | |
350 | } | |
351 | ||
7e75fc3f HB |
352 | static void deallocate_buffers(struct cpu_hw_sf *cpuhw) |
353 | { | |
354 | if (cpuhw->sfb.sdbt) | |
355 | free_sampling_buffer(&cpuhw->sfb); | |
356 | } | |
357 | ||
358 | static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) | |
8c069ff4 | 359 | { |
7e75fc3f | 360 | unsigned long n_sdb, freq, factor; |
3d43b981 | 361 | size_t sample_size; |
8c069ff4 HB |
362 | |
363 | /* Calculate sampling buffers using 4K pages | |
364 | * | |
7e75fc3f HB |
365 | * 1. Determine the sample data size which depends on the used |
366 | * sampling functions, for example, basic-sampling or | |
367 | * basic-sampling with diagnostic-sampling. | |
368 | * | |
369 | * 2. Use the sampling frequency as input. The sampling buffer is | |
370 | * designed for almost one second. This can be adjusted through | |
371 | * the "factor" variable. | |
8c069ff4 | 372 | * In any case, alloc_sampling_buffer() sets the Alert Request |
7e75fc3f | 373 | * Control indicator to trigger a measurement-alert to harvest |
8c069ff4 HB |
374 | * sample-data-blocks (sdb). |
375 | * | |
7e75fc3f | 376 | * 3. Compute the number of sample-data-blocks and ensure a minimum |
8c069ff4 | 377 | * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not |
7e75fc3f HB |
378 | * exceed a "calculated" maximum. The symbolic maximum is |
379 | * designed for basic-sampling only and needs to be increased if | |
380 | * diagnostic-sampling is active. | |
381 | * See also the remarks for these symbolic constants. | |
8c069ff4 | 382 | * |
7e75fc3f HB |
383 | * 4. Compute the number of sample-data-block-tables (SDBT) and |
384 | * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up | |
385 | * to 511 SDBs). | |
8c069ff4 | 386 | */ |
3d43b981 | 387 | sample_size = sizeof(struct hws_basic_entry); |
8c069ff4 HB |
388 | freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); |
389 | factor = 1; | |
7e75fc3f | 390 | n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); |
8c069ff4 HB |
391 | if (n_sdb < CPUM_SF_MIN_SDB) |
392 | n_sdb = CPUM_SF_MIN_SDB; | |
393 | ||
69f239ed HB |
394 | /* If there is already a sampling buffer allocated, it is very likely |
395 | * that the sampling facility is enabled too. If the event to be | |
396 | * initialized requires a greater sampling buffer, the allocation must | |
397 | * be postponed. Changing the sampling buffer requires the sampling | |
398 | * facility to be in the disabled state. So, account the number of | |
399 | * required SDBs and let cpumsf_pmu_enable() resize the buffer just | |
400 | * before the event is started. | |
8c069ff4 | 401 | */ |
69f239ed | 402 | sfb_init_allocs(n_sdb, hwc); |
8c069ff4 HB |
403 | if (sf_buffer_available(cpuhw)) |
404 | return 0; | |
405 | ||
406 | debug_sprintf_event(sfdbg, 3, | |
7e75fc3f HB |
407 | "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu" |
408 | " sample_size=%lu cpuhw=%p\n", | |
409 | SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc), | |
410 | sample_size, cpuhw); | |
8c069ff4 HB |
411 | |
412 | return alloc_sampling_buffer(&cpuhw->sfb, | |
69f239ed | 413 | sfb_pending_allocs(&cpuhw->sfb, hwc)); |
8c069ff4 HB |
414 | } |
415 | ||
69f239ed HB |
416 | static unsigned long min_percent(unsigned int percent, unsigned long base, |
417 | unsigned long min) | |
418 | { | |
419 | return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100)); | |
420 | } | |
8c069ff4 | 421 | |
69f239ed HB |
422 | static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base) |
423 | { | |
424 | /* Use a percentage-based approach to extend the sampling facility | |
425 | * buffer. Accept up to 5% sample data loss. | |
426 | * Vary the extents between 1% to 5% of the current number of | |
427 | * sample-data-blocks. | |
428 | */ | |
429 | if (ratio <= 5) | |
430 | return 0; | |
431 | if (ratio <= 25) | |
432 | return min_percent(1, base, 1); | |
433 | if (ratio <= 50) | |
434 | return min_percent(1, base, 1); | |
435 | if (ratio <= 75) | |
436 | return min_percent(2, base, 2); | |
437 | if (ratio <= 100) | |
438 | return min_percent(3, base, 3); | |
439 | if (ratio <= 250) | |
440 | return min_percent(4, base, 4); | |
441 | ||
442 | return min_percent(5, base, 8); | |
443 | } | |
8c069ff4 | 444 | |
69f239ed HB |
445 | static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, |
446 | struct hw_perf_event *hwc) | |
447 | { | |
448 | unsigned long ratio, num; | |
449 | ||
450 | if (!OVERFLOW_REG(hwc)) | |
451 | return; | |
452 | ||
453 | /* The sample_overflow contains the average number of sample data | |
454 | * that has been lost because sample-data-blocks were full. | |
455 | * | |
456 | * Calculate the total number of sample data entries that has been | |
457 | * discarded. Then calculate the ratio of lost samples to total samples | |
458 | * per second in percent. | |
459 | */ | |
460 | ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb, | |
461 | sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc))); | |
462 | ||
463 | /* Compute number of sample-data-blocks */ | |
464 | num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb); | |
465 | if (num) | |
466 | sfb_account_allocs(num, hwc); | |
467 | ||
468 | debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu" | |
469 | " num=%lu\n", OVERFLOW_REG(hwc), ratio, num); | |
470 | OVERFLOW_REG(hwc) = 0; | |
471 | } | |
472 | ||
473 | /* extend_sampling_buffer() - Extend sampling buffer | |
474 | * @sfb: Sampling buffer structure (for local CPU) | |
475 | * @hwc: Perf event hardware structure | |
476 | * | |
477 | * Use this function to extend the sampling buffer based on the overflow counter | |
478 | * and postponed allocation extents stored in the specified Perf event hardware. | |
479 | * | |
480 | * Important: This function disables the sampling facility in order to safely | |
481 | * change the sampling buffer structure. Do not call this function | |
482 | * when the PMU is active. | |
8c069ff4 | 483 | */ |
69f239ed HB |
484 | static void extend_sampling_buffer(struct sf_buffer *sfb, |
485 | struct hw_perf_event *hwc) | |
8c069ff4 | 486 | { |
69f239ed HB |
487 | unsigned long num, num_old; |
488 | int rc; | |
8c069ff4 | 489 | |
69f239ed HB |
490 | num = sfb_pending_allocs(sfb, hwc); |
491 | if (!num) | |
492 | return; | |
493 | num_old = sfb->num_sdb; | |
494 | ||
495 | /* Disable the sampling facility to reset any states and also | |
496 | * clear pending measurement alerts. | |
497 | */ | |
498 | sf_disable(); | |
499 | ||
500 | /* Extend the sampling buffer. | |
501 | * This memory allocation typically happens in an atomic context when | |
502 | * called by perf. Because this is a reallocation, it is fine if the | |
503 | * new SDB-request cannot be satisfied immediately. | |
504 | */ | |
505 | rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); | |
506 | if (rc) | |
507 | debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " | |
508 | "failed with rc=%i\n", rc); | |
509 | ||
510 | if (sfb_has_pending_allocs(sfb, hwc)) | |
511 | debug_sprintf_event(sfdbg, 5, "sfb: extend: " | |
512 | "req=%lu alloc=%lu remaining=%lu\n", | |
513 | num, sfb->num_sdb - num_old, | |
514 | sfb_pending_allocs(sfb, hwc)); | |
8c069ff4 HB |
515 | } |
516 | ||
517 | ||
69f239ed HB |
518 | /* Number of perf events counting hardware events */ |
519 | static atomic_t num_events; | |
520 | /* Used to avoid races in calling reserve/release_cpumf_hardware */ | |
521 | static DEFINE_MUTEX(pmc_reserve_mutex); | |
522 | ||
8c069ff4 HB |
523 | #define PMC_INIT 0 |
524 | #define PMC_RELEASE 1 | |
e28bb79d | 525 | #define PMC_FAILURE 2 |
8c069ff4 HB |
526 | static void setup_pmc_cpu(void *flags) |
527 | { | |
528 | int err; | |
eb7e7d76 | 529 | struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 | 530 | |
8c069ff4 HB |
531 | err = 0; |
532 | switch (*((int *) flags)) { | |
533 | case PMC_INIT: | |
534 | memset(cpusf, 0, sizeof(*cpusf)); | |
535 | err = qsi(&cpusf->qsi); | |
536 | if (err) | |
537 | break; | |
538 | cpusf->flags |= PMU_F_RESERVED; | |
539 | err = sf_disable(); | |
540 | if (err) | |
541 | pr_err("Switching off the sampling facility failed " | |
542 | "with rc=%i\n", err); | |
543 | debug_sprintf_event(sfdbg, 5, | |
544 | "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf); | |
545 | break; | |
546 | case PMC_RELEASE: | |
547 | cpusf->flags &= ~PMU_F_RESERVED; | |
548 | err = sf_disable(); | |
549 | if (err) { | |
550 | pr_err("Switching off the sampling facility failed " | |
551 | "with rc=%i\n", err); | |
7e75fc3f HB |
552 | } else |
553 | deallocate_buffers(cpusf); | |
8c069ff4 HB |
554 | debug_sprintf_event(sfdbg, 5, |
555 | "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); | |
556 | break; | |
557 | } | |
e28bb79d HB |
558 | if (err) |
559 | *((int *) flags) |= PMC_FAILURE; | |
8c069ff4 HB |
560 | } |
561 | ||
562 | static void release_pmc_hardware(void) | |
563 | { | |
564 | int flags = PMC_RELEASE; | |
565 | ||
566 | irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); | |
567 | on_each_cpu(setup_pmc_cpu, &flags, 1); | |
568 | } | |
569 | ||
570 | static int reserve_pmc_hardware(void) | |
571 | { | |
572 | int flags = PMC_INIT; | |
573 | ||
574 | on_each_cpu(setup_pmc_cpu, &flags, 1); | |
e28bb79d HB |
575 | if (flags & PMC_FAILURE) { |
576 | release_pmc_hardware(); | |
577 | return -ENODEV; | |
578 | } | |
8c069ff4 HB |
579 | irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); |
580 | ||
581 | return 0; | |
582 | } | |
583 | ||
584 | static void hw_perf_event_destroy(struct perf_event *event) | |
585 | { | |
586 | /* Release PMC if this is the last perf event */ | |
587 | if (!atomic_add_unless(&num_events, -1, 1)) { | |
588 | mutex_lock(&pmc_reserve_mutex); | |
589 | if (atomic_dec_return(&num_events) == 0) | |
590 | release_pmc_hardware(); | |
591 | mutex_unlock(&pmc_reserve_mutex); | |
592 | } | |
593 | } | |
594 | ||
595 | static void hw_init_period(struct hw_perf_event *hwc, u64 period) | |
596 | { | |
597 | hwc->sample_period = period; | |
598 | hwc->last_period = hwc->sample_period; | |
599 | local64_set(&hwc->period_left, hwc->sample_period); | |
600 | } | |
601 | ||
602 | static void hw_reset_registers(struct hw_perf_event *hwc, | |
69f239ed | 603 | unsigned long *sdbt_origin) |
8c069ff4 | 604 | { |
69f239ed HB |
605 | /* (Re)set to first sample-data-block-table */ |
606 | TEAR_REG(hwc) = (unsigned long) sdbt_origin; | |
8c069ff4 HB |
607 | } |
608 | ||
609 | static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, | |
610 | unsigned long rate) | |
611 | { | |
69f239ed HB |
612 | return clamp_t(unsigned long, rate, |
613 | si->min_sampl_rate, si->max_sampl_rate); | |
8c069ff4 HB |
614 | } |
615 | ||
544e8dd7 HB |
616 | static u32 cpumsf_pid_type(struct perf_event *event, |
617 | u32 pid, enum pid_type type) | |
618 | { | |
619 | struct task_struct *tsk; | |
620 | ||
621 | /* Idle process */ | |
622 | if (!pid) | |
623 | goto out; | |
624 | ||
625 | tsk = find_task_by_pid_ns(pid, &init_pid_ns); | |
626 | pid = -1; | |
627 | if (tsk) { | |
628 | /* | |
629 | * Only top level events contain the pid namespace in which | |
630 | * they are created. | |
631 | */ | |
632 | if (event->parent) | |
633 | event = event->parent; | |
634 | pid = __task_pid_nr_ns(tsk, type, event->ns); | |
635 | /* | |
636 | * See also 1d953111b648 | |
637 | * "perf/core: Don't report zero PIDs for exiting tasks". | |
638 | */ | |
639 | if (!pid && !pid_alive(tsk)) | |
640 | pid = -1; | |
641 | } | |
642 | out: | |
643 | return pid; | |
644 | } | |
645 | ||
646 | static void cpumsf_output_event_pid(struct perf_event *event, | |
647 | struct perf_sample_data *data, | |
648 | struct pt_regs *regs) | |
649 | { | |
650 | u32 pid; | |
651 | struct perf_event_header header; | |
652 | struct perf_output_handle handle; | |
653 | ||
654 | /* | |
655 | * Obtain the PID from the basic-sampling data entry and | |
656 | * correct the data->tid_entry.pid value. | |
657 | */ | |
658 | pid = data->tid_entry.pid; | |
659 | ||
660 | /* Protect callchain buffers, tasks */ | |
661 | rcu_read_lock(); | |
662 | ||
663 | perf_prepare_sample(&header, data, event, regs); | |
664 | if (perf_output_begin(&handle, event, header.size)) | |
665 | goto out; | |
666 | ||
667 | /* Update the process ID (see also kernel/events/core.c) */ | |
668 | data->tid_entry.pid = cpumsf_pid_type(event, pid, __PIDTYPE_TGID); | |
669 | data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID); | |
670 | ||
671 | perf_output_sample(&handle, &header, data, event); | |
672 | perf_output_end(&handle); | |
673 | out: | |
674 | rcu_read_unlock(); | |
675 | } | |
676 | ||
8c069ff4 HB |
677 | static int __hw_perf_event_init(struct perf_event *event) |
678 | { | |
679 | struct cpu_hw_sf *cpuhw; | |
680 | struct hws_qsi_info_block si; | |
681 | struct perf_event_attr *attr = &event->attr; | |
682 | struct hw_perf_event *hwc = &event->hw; | |
683 | unsigned long rate; | |
684 | int cpu, err; | |
685 | ||
686 | /* Reserve CPU-measurement sampling facility */ | |
687 | err = 0; | |
688 | if (!atomic_inc_not_zero(&num_events)) { | |
689 | mutex_lock(&pmc_reserve_mutex); | |
690 | if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) | |
691 | err = -EBUSY; | |
692 | else | |
693 | atomic_inc(&num_events); | |
694 | mutex_unlock(&pmc_reserve_mutex); | |
695 | } | |
696 | event->destroy = hw_perf_event_destroy; | |
697 | ||
698 | if (err) | |
699 | goto out; | |
700 | ||
701 | /* Access per-CPU sampling information (query sampling info) */ | |
702 | /* | |
703 | * The event->cpu value can be -1 to count on every CPU, for example, | |
704 | * when attaching to a task. If this is specified, use the query | |
705 | * sampling info from the current CPU, otherwise use event->cpu to | |
706 | * retrieve the per-CPU information. | |
707 | * Later, cpuhw indicates whether to allocate sampling buffers for a | |
708 | * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL). | |
709 | */ | |
710 | memset(&si, 0, sizeof(si)); | |
711 | cpuhw = NULL; | |
712 | if (event->cpu == -1) | |
713 | qsi(&si); | |
714 | else { | |
715 | /* Event is pinned to a particular CPU, retrieve the per-CPU | |
716 | * sampling structure for accessing the CPU-specific QSI. | |
717 | */ | |
718 | cpuhw = &per_cpu(cpu_hw_sf, event->cpu); | |
719 | si = cpuhw->qsi; | |
720 | } | |
721 | ||
722 | /* Check sampling facility authorization and, if not authorized, | |
723 | * fall back to other PMUs. It is safe to check any CPU because | |
724 | * the authorization is identical for all configured CPUs. | |
725 | */ | |
726 | if (!si.as) { | |
727 | err = -ENOENT; | |
728 | goto out; | |
729 | } | |
730 | ||
7e75fc3f HB |
731 | /* Always enable basic sampling */ |
732 | SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; | |
733 | ||
734 | /* Check if diagnostic sampling is requested. Deny if the required | |
735 | * sampling authorization is missing. | |
736 | */ | |
737 | if (attr->config == PERF_EVENT_CPUM_SF_DIAG) { | |
738 | if (!si.ad) { | |
739 | err = -EPERM; | |
740 | goto out; | |
741 | } | |
742 | SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE; | |
743 | } | |
744 | ||
d7528862 HB |
745 | /* Check and set other sampling flags */ |
746 | if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS) | |
747 | SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS; | |
748 | ||
8c069ff4 HB |
749 | /* The sampling information (si) contains information about the |
750 | * min/max sampling intervals and the CPU speed. So calculate the | |
751 | * correct sampling interval and avoid the whole period adjust | |
752 | * feedback loop. | |
753 | */ | |
754 | rate = 0; | |
755 | if (attr->freq) { | |
4bbaf258 HB |
756 | if (!attr->sample_freq) { |
757 | err = -EINVAL; | |
758 | goto out; | |
759 | } | |
8c069ff4 HB |
760 | rate = freq_to_sample_rate(&si, attr->sample_freq); |
761 | rate = hw_limit_rate(&si, rate); | |
762 | attr->freq = 0; | |
763 | attr->sample_period = rate; | |
764 | } else { | |
765 | /* The min/max sampling rates specifies the valid range | |
766 | * of sample periods. If the specified sample period is | |
767 | * out of range, limit the period to the range boundary. | |
768 | */ | |
769 | rate = hw_limit_rate(&si, hwc->sample_period); | |
770 | ||
771 | /* The perf core maintains a maximum sample rate that is | |
772 | * configurable through the sysctl interface. Ensure the | |
773 | * sampling rate does not exceed this value. This also helps | |
774 | * to avoid throttling when pushing samples with | |
775 | * perf_event_overflow(). | |
776 | */ | |
777 | if (sample_rate_to_freq(&si, rate) > | |
778 | sysctl_perf_event_sample_rate) { | |
779 | err = -EINVAL; | |
780 | debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n"); | |
781 | goto out; | |
782 | } | |
783 | } | |
784 | SAMPL_RATE(hwc) = rate; | |
785 | hw_init_period(hwc, SAMPL_RATE(hwc)); | |
786 | ||
69f239ed HB |
787 | /* Initialize sample data overflow accounting */ |
788 | hwc->extra_reg.reg = REG_OVERFLOW; | |
789 | OVERFLOW_REG(hwc) = 0; | |
790 | ||
cbf6948f PH |
791 | /* Use AUX buffer. No need to allocate it by ourself */ |
792 | if (attr->config == PERF_EVENT_CPUM_SF_DIAG) | |
793 | return 0; | |
794 | ||
8c069ff4 HB |
795 | /* Allocate the per-CPU sampling buffer using the CPU information |
796 | * from the event. If the event is not pinned to a particular | |
797 | * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling | |
798 | * buffers for each online CPU. | |
799 | */ | |
800 | if (cpuhw) | |
801 | /* Event is pinned to a particular CPU */ | |
7e75fc3f | 802 | err = allocate_buffers(cpuhw, hwc); |
8c069ff4 HB |
803 | else { |
804 | /* Event is not pinned, allocate sampling buffer on | |
805 | * each online CPU | |
806 | */ | |
807 | for_each_online_cpu(cpu) { | |
808 | cpuhw = &per_cpu(cpu_hw_sf, cpu); | |
7e75fc3f | 809 | err = allocate_buffers(cpuhw, hwc); |
8c069ff4 HB |
810 | if (err) |
811 | break; | |
812 | } | |
813 | } | |
544e8dd7 HB |
814 | |
815 | /* If PID/TID sampling is active, replace the default overflow | |
816 | * handler to extract and resolve the PIDs from the basic-sampling | |
817 | * data entries. | |
818 | */ | |
819 | if (event->attr.sample_type & PERF_SAMPLE_TID) | |
820 | if (is_default_overflow_handler(event)) | |
821 | event->overflow_handler = cpumsf_output_event_pid; | |
8c069ff4 HB |
822 | out: |
823 | return err; | |
824 | } | |
825 | ||
826 | static int cpumsf_pmu_event_init(struct perf_event *event) | |
827 | { | |
828 | int err; | |
829 | ||
55baa2f8 HB |
830 | /* No support for taken branch sampling */ |
831 | if (has_branch_stack(event)) | |
832 | return -EOPNOTSUPP; | |
833 | ||
834 | switch (event->attr.type) { | |
835 | case PERF_TYPE_RAW: | |
7e75fc3f HB |
836 | if ((event->attr.config != PERF_EVENT_CPUM_SF) && |
837 | (event->attr.config != PERF_EVENT_CPUM_SF_DIAG)) | |
55baa2f8 HB |
838 | return -ENOENT; |
839 | break; | |
840 | case PERF_TYPE_HARDWARE: | |
841 | /* Support sampling of CPU cycles in addition to the | |
842 | * counter facility. However, the counter facility | |
843 | * is more precise and, hence, restrict this PMU to | |
844 | * sampling events only. | |
845 | */ | |
846 | if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES) | |
847 | return -ENOENT; | |
848 | if (!is_sampling_event(event)) | |
849 | return -ENOENT; | |
850 | break; | |
851 | default: | |
8c069ff4 | 852 | return -ENOENT; |
55baa2f8 | 853 | } |
8c069ff4 | 854 | |
dd127b3b | 855 | /* Check online status of the CPU to which the event is pinned */ |
19220999 | 856 | if (event->cpu >= 0 && !cpu_online(event->cpu)) |
fc3100d6 | 857 | return -ENODEV; |
8c069ff4 | 858 | |
dd127b3b HB |
859 | /* Force reset of idle/hv excludes regardless of what the |
860 | * user requested. | |
861 | */ | |
862 | if (event->attr.exclude_hv) | |
863 | event->attr.exclude_hv = 0; | |
864 | if (event->attr.exclude_idle) | |
865 | event->attr.exclude_idle = 0; | |
866 | ||
8c069ff4 HB |
867 | err = __hw_perf_event_init(event); |
868 | if (unlikely(err)) | |
869 | if (event->destroy) | |
870 | event->destroy(event); | |
871 | return err; | |
872 | } | |
873 | ||
874 | static void cpumsf_pmu_enable(struct pmu *pmu) | |
875 | { | |
eb7e7d76 | 876 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
69f239ed | 877 | struct hw_perf_event *hwc; |
8c069ff4 HB |
878 | int err; |
879 | ||
880 | if (cpuhw->flags & PMU_F_ENABLED) | |
881 | return; | |
882 | ||
883 | if (cpuhw->flags & PMU_F_ERR_MASK) | |
884 | return; | |
885 | ||
69f239ed HB |
886 | /* Check whether to extent the sampling buffer. |
887 | * | |
888 | * Two conditions trigger an increase of the sampling buffer for a | |
889 | * perf event: | |
890 | * 1. Postponed buffer allocations from the event initialization. | |
891 | * 2. Sampling overflows that contribute to pending allocations. | |
892 | * | |
893 | * Note that the extend_sampling_buffer() function disables the sampling | |
894 | * facility, but it can be fully re-enabled using sampling controls that | |
895 | * have been saved in cpumsf_pmu_disable(). | |
896 | */ | |
897 | if (cpuhw->event) { | |
898 | hwc = &cpuhw->event->hw; | |
cbf6948f PH |
899 | if (!(SAMPL_DIAG_MODE(hwc))) { |
900 | /* | |
901 | * Account number of overflow-designated | |
902 | * buffer extents | |
903 | */ | |
904 | sfb_account_overflows(cpuhw, hwc); | |
905 | if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) | |
906 | extend_sampling_buffer(&cpuhw->sfb, hwc); | |
907 | } | |
69f239ed HB |
908 | } |
909 | ||
910 | /* (Re)enable the PMU and sampling facility */ | |
8c069ff4 HB |
911 | cpuhw->flags |= PMU_F_ENABLED; |
912 | barrier(); | |
913 | ||
914 | err = lsctl(&cpuhw->lsctl); | |
915 | if (err) { | |
916 | cpuhw->flags &= ~PMU_F_ENABLED; | |
917 | pr_err("Loading sampling controls failed: op=%i err=%i\n", | |
918 | 1, err); | |
919 | return; | |
920 | } | |
921 | ||
d4c7e649 HB |
922 | /* Load current program parameter */ |
923 | lpp(&S390_lowcore.lpp); | |
924 | ||
7e75fc3f HB |
925 | debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " |
926 | "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, | |
927 | cpuhw->lsctl.ed, cpuhw->lsctl.cd, | |
8c069ff4 HB |
928 | (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); |
929 | } | |
930 | ||
931 | static void cpumsf_pmu_disable(struct pmu *pmu) | |
932 | { | |
eb7e7d76 | 933 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
934 | struct hws_lsctl_request_block inactive; |
935 | struct hws_qsi_info_block si; | |
936 | int err; | |
937 | ||
938 | if (!(cpuhw->flags & PMU_F_ENABLED)) | |
939 | return; | |
940 | ||
941 | if (cpuhw->flags & PMU_F_ERR_MASK) | |
942 | return; | |
943 | ||
944 | /* Switch off sampling activation control */ | |
945 | inactive = cpuhw->lsctl; | |
946 | inactive.cs = 0; | |
7e75fc3f | 947 | inactive.cd = 0; |
8c069ff4 HB |
948 | |
949 | err = lsctl(&inactive); | |
950 | if (err) { | |
951 | pr_err("Loading sampling controls failed: op=%i err=%i\n", | |
952 | 2, err); | |
953 | return; | |
954 | } | |
955 | ||
956 | /* Save state of TEAR and DEAR register contents */ | |
957 | if (!qsi(&si)) { | |
958 | /* TEAR/DEAR values are valid only if the sampling facility is | |
959 | * enabled. Note that cpumsf_pmu_disable() might be called even | |
960 | * for a disabled sampling facility because cpumsf_pmu_enable() | |
961 | * controls the enable/disable state. | |
962 | */ | |
963 | if (si.es) { | |
964 | cpuhw->lsctl.tear = si.tear; | |
965 | cpuhw->lsctl.dear = si.dear; | |
966 | } | |
967 | } else | |
968 | debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: " | |
969 | "qsi() failed with err=%i\n", err); | |
970 | ||
971 | cpuhw->flags &= ~PMU_F_ENABLED; | |
972 | } | |
973 | ||
dd127b3b HB |
974 | /* perf_exclude_event() - Filter event |
975 | * @event: The perf event | |
976 | * @regs: pt_regs structure | |
977 | * @sde_regs: Sample-data-entry (sde) regs structure | |
978 | * | |
979 | * Filter perf events according to their exclude specification. | |
980 | * | |
981 | * Return non-zero if the event shall be excluded. | |
982 | */ | |
983 | static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, | |
984 | struct perf_sf_sde_regs *sde_regs) | |
985 | { | |
986 | if (event->attr.exclude_user && user_mode(regs)) | |
987 | return 1; | |
988 | if (event->attr.exclude_kernel && !user_mode(regs)) | |
989 | return 1; | |
990 | if (event->attr.exclude_guest && sde_regs->in_guest) | |
991 | return 1; | |
992 | if (event->attr.exclude_host && !sde_regs->in_guest) | |
993 | return 1; | |
994 | return 0; | |
995 | } | |
996 | ||
8c069ff4 HB |
997 | /* perf_push_sample() - Push samples to perf |
998 | * @event: The perf event | |
999 | * @sample: Hardware sample data | |
1000 | * | |
1001 | * Use the hardware sample data to create perf event sample. The sample | |
1002 | * is the pushed to the event subsystem and the function checks for | |
1003 | * possible event overflows. If an event overflow occurs, the PMU is | |
1004 | * stopped. | |
1005 | * | |
1006 | * Return non-zero if an event overflow occurred. | |
1007 | */ | |
3d43b981 PH |
1008 | static int perf_push_sample(struct perf_event *event, |
1009 | struct hws_basic_entry *basic) | |
8c069ff4 HB |
1010 | { |
1011 | int overflow; | |
1012 | struct pt_regs regs; | |
443e802b | 1013 | struct perf_sf_sde_regs *sde_regs; |
8c069ff4 HB |
1014 | struct perf_sample_data data; |
1015 | ||
7e75fc3f | 1016 | /* Setup perf sample */ |
8c069ff4 HB |
1017 | perf_sample_data_init(&data, 0, event->hw.last_period); |
1018 | ||
443e802b HB |
1019 | /* Setup pt_regs to look like an CPU-measurement external interrupt |
1020 | * using the Program Request Alert code. The regs.int_parm_long | |
1021 | * field which is unused contains additional sample-data-entry related | |
1022 | * indicators. | |
1023 | */ | |
8c069ff4 | 1024 | memset(®s, 0, sizeof(regs)); |
443e802b HB |
1025 | regs.int_code = 0x1407; |
1026 | regs.int_parm = CPU_MF_INT_SF_PRA; | |
1027 | sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; | |
1028 | ||
3d43b981 PH |
1029 | psw_bits(regs.psw).ia = basic->ia; |
1030 | psw_bits(regs.psw).dat = basic->T; | |
1031 | psw_bits(regs.psw).wait = basic->W; | |
1032 | psw_bits(regs.psw).pstate = basic->P; | |
1033 | psw_bits(regs.psw).as = basic->AS; | |
8c069ff4 | 1034 | |
e22cf8ca | 1035 | /* |
c19805f8 CB |
1036 | * Use the hardware provided configuration level to decide if the |
1037 | * sample belongs to a guest or host. If that is not available, | |
1038 | * fall back to the following heuristics: | |
1039 | * A non-zero guest program parameter always indicates a guest | |
1040 | * sample. Some early samples or samples from guests without | |
b1685ab9 | 1041 | * lpp usage would be misaccounted to the host. We use the asn |
c19805f8 | 1042 | * value as an addon heuristic to detect most of these guest samples. |
df26c2e8 MS |
1043 | * If the value differs from 0xffff (the host value), we assume to |
1044 | * be a KVM guest. | |
443e802b | 1045 | */ |
3d43b981 | 1046 | switch (basic->CL) { |
c19805f8 CB |
1047 | case 1: /* logical partition */ |
1048 | sde_regs->in_guest = 0; | |
1049 | break; | |
1050 | case 2: /* virtual machine */ | |
443e802b | 1051 | sde_regs->in_guest = 1; |
c19805f8 CB |
1052 | break; |
1053 | default: /* old machine, use heuristics */ | |
3d43b981 | 1054 | if (basic->gpp || basic->prim_asn != 0xffff) |
c19805f8 CB |
1055 | sde_regs->in_guest = 1; |
1056 | break; | |
1057 | } | |
443e802b | 1058 | |
544e8dd7 HB |
1059 | /* |
1060 | * Store the PID value from the sample-data-entry to be | |
1061 | * processed and resolved by cpumsf_output_event_pid(). | |
1062 | */ | |
1063 | data.tid_entry.pid = basic->hpp & LPP_PID_MASK; | |
1064 | ||
8c069ff4 | 1065 | overflow = 0; |
dd127b3b HB |
1066 | if (perf_exclude_event(event, ®s, sde_regs)) |
1067 | goto out; | |
8c069ff4 HB |
1068 | if (perf_event_overflow(event, &data, ®s)) { |
1069 | overflow = 1; | |
1070 | event->pmu->stop(event, 0); | |
8c069ff4 HB |
1071 | } |
1072 | perf_event_update_userpage(event); | |
dd127b3b | 1073 | out: |
8c069ff4 HB |
1074 | return overflow; |
1075 | } | |
1076 | ||
1077 | static void perf_event_count_update(struct perf_event *event, u64 count) | |
1078 | { | |
1079 | local64_add(count, &event->count); | |
1080 | } | |
1081 | ||
3d43b981 PH |
1082 | static void debug_sample_entry(struct hws_basic_entry *sample, |
1083 | struct hws_trailer_entry *te) | |
7e75fc3f HB |
1084 | { |
1085 | debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " | |
3d43b981 PH |
1086 | "sampling data entry: te->f=%i basic.def=%04x (%p)\n", |
1087 | te->f, sample->def, sample); | |
7e75fc3f HB |
1088 | } |
1089 | ||
8c069ff4 HB |
1090 | /* hw_collect_samples() - Walk through a sample-data-block and collect samples |
1091 | * @event: The perf event | |
1092 | * @sdbt: Sample-data-block table | |
1093 | * @overflow: Event overflow counter | |
1094 | * | |
7e75fc3f HB |
1095 | * Walks through a sample-data-block and collects sampling data entries that are |
1096 | * then pushed to the perf event subsystem. Depending on the sampling function, | |
1097 | * there can be either basic-sampling or combined-sampling data entries. A | |
1098 | * combined-sampling data entry consists of a basic- and a diagnostic-sampling | |
1099 | * data entry. The sampling function is determined by the flags in the perf | |
1100 | * event hardware structure. The function always works with a combined-sampling | |
1101 | * data entry but ignores the the diagnostic portion if it is not available. | |
1102 | * | |
1103 | * Note that the implementation focuses on basic-sampling data entries and, if | |
1104 | * such an entry is not valid, the entire combined-sampling data entry is | |
1105 | * ignored. | |
1106 | * | |
1107 | * The overflow variables counts the number of samples that has been discarded | |
1108 | * due to a perf event overflow. | |
8c069ff4 HB |
1109 | */ |
1110 | static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, | |
1111 | unsigned long long *overflow) | |
1112 | { | |
7e75fc3f | 1113 | struct hws_trailer_entry *te; |
3d43b981 | 1114 | struct hws_basic_entry *sample; |
8c069ff4 | 1115 | |
7e75fc3f | 1116 | te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); |
3d43b981 | 1117 | sample = (struct hws_basic_entry *) *sdbt; |
7e75fc3f | 1118 | while ((unsigned long *) sample < (unsigned long *) te) { |
8c069ff4 | 1119 | /* Check for an empty sample */ |
3d43b981 | 1120 | if (!sample->def) |
8c069ff4 HB |
1121 | break; |
1122 | ||
1123 | /* Update perf event period */ | |
1124 | perf_event_count_update(event, SAMPL_RATE(&event->hw)); | |
1125 | ||
3d43b981 PH |
1126 | /* Check whether sample is valid */ |
1127 | if (sample->def == 0x0001) { | |
8c069ff4 HB |
1128 | /* If an event overflow occurred, the PMU is stopped to |
1129 | * throttle event delivery. Remaining sample data is | |
1130 | * discarded. | |
1131 | */ | |
7e75fc3f | 1132 | if (!*overflow) { |
3d43b981 PH |
1133 | /* Check whether sample is consistent */ |
1134 | if (sample->I == 0 && sample->W == 0) { | |
7e75fc3f | 1135 | /* Deliver sample data to perf */ |
3d43b981 PH |
1136 | *overflow = perf_push_sample(event, |
1137 | sample); | |
7e75fc3f HB |
1138 | } |
1139 | } else | |
8c069ff4 HB |
1140 | /* Count discarded samples */ |
1141 | *overflow += 1; | |
7e75fc3f | 1142 | } else { |
3d43b981 | 1143 | debug_sample_entry(sample, te); |
7e75fc3f HB |
1144 | /* Sample slot is not yet written or other record. |
1145 | * | |
1146 | * This condition can occur if the buffer was reused | |
1147 | * from a combined basic- and diagnostic-sampling. | |
1148 | * If only basic-sampling is then active, entries are | |
1149 | * written into the larger diagnostic entries. | |
1150 | * This is typically the case for sample-data-blocks | |
1151 | * that are not full. Stop processing if the first | |
1152 | * invalid format was detected. | |
1153 | */ | |
1154 | if (!te->f) | |
1155 | break; | |
1156 | } | |
8c069ff4 HB |
1157 | |
1158 | /* Reset sample slot and advance to next sample */ | |
3d43b981 PH |
1159 | sample->def = 0; |
1160 | sample++; | |
8c069ff4 HB |
1161 | } |
1162 | } | |
1163 | ||
1164 | /* hw_perf_event_update() - Process sampling buffer | |
1165 | * @event: The perf event | |
1166 | * @flush_all: Flag to also flush partially filled sample-data-blocks | |
1167 | * | |
1168 | * Processes the sampling buffer and create perf event samples. | |
1169 | * The sampling buffer position are retrieved and saved in the TEAR_REG | |
1170 | * register of the specified perf event. | |
1171 | * | |
1172 | * Only full sample-data-blocks are processed. Specify the flash_all flag | |
d7528862 HB |
1173 | * to also walk through partially filled sample-data-blocks. It is ignored |
1174 | * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag | |
1175 | * enforces the processing of full sample-data-blocks only (trailer entries | |
1176 | * with the block-full-indicator bit set). | |
8c069ff4 HB |
1177 | */ |
1178 | static void hw_perf_event_update(struct perf_event *event, int flush_all) | |
1179 | { | |
1180 | struct hw_perf_event *hwc = &event->hw; | |
1181 | struct hws_trailer_entry *te; | |
1182 | unsigned long *sdbt; | |
fcc77f50 | 1183 | unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; |
8c069ff4 HB |
1184 | int done; |
1185 | ||
cbf6948f PH |
1186 | /* |
1187 | * AUX buffer is used when in diagnostic sampling mode. | |
1188 | * No perf events/samples are created. | |
1189 | */ | |
1190 | if (SAMPL_DIAG_MODE(&event->hw)) | |
1191 | return; | |
1192 | ||
d7528862 HB |
1193 | if (flush_all && SDB_FULL_BLOCKS(hwc)) |
1194 | flush_all = 0; | |
1195 | ||
8c069ff4 | 1196 | sdbt = (unsigned long *) TEAR_REG(hwc); |
69f239ed | 1197 | done = event_overflow = sampl_overflow = num_sdb = 0; |
8c069ff4 HB |
1198 | while (!done) { |
1199 | /* Get the trailer entry of the sample-data-block */ | |
1200 | te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); | |
1201 | ||
1202 | /* Leave loop if no more work to do (block full indicator) */ | |
1203 | if (!te->f) { | |
1204 | done = 1; | |
1205 | if (!flush_all) | |
1206 | break; | |
1207 | } | |
1208 | ||
69f239ed HB |
1209 | /* Check the sample overflow count */ |
1210 | if (te->overflow) | |
1211 | /* Account sample overflows and, if a particular limit | |
1212 | * is reached, extend the sampling buffer. | |
1213 | * For details, see sfb_account_overflows(). | |
8c069ff4 | 1214 | */ |
69f239ed | 1215 | sampl_overflow += te->overflow; |
8c069ff4 HB |
1216 | |
1217 | /* Timestamps are valid for full sample-data-blocks only */ | |
1218 | debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " | |
1219 | "overflow=%llu timestamp=0x%llx\n", | |
1220 | sdbt, te->overflow, | |
443d4beb | 1221 | (te->f) ? trailer_timestamp(te) : 0ULL); |
8c069ff4 HB |
1222 | |
1223 | /* Collect all samples from a single sample-data-block and | |
1224 | * flag if an (perf) event overflow happened. If so, the PMU | |
1225 | * is stopped and remaining samples will be discarded. | |
1226 | */ | |
1227 | hw_collect_samples(event, sdbt, &event_overflow); | |
69f239ed | 1228 | num_sdb++; |
8c069ff4 | 1229 | |
fcc77f50 HB |
1230 | /* Reset trailer (using compare-double-and-swap) */ |
1231 | do { | |
1232 | te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; | |
1233 | te_flags |= SDB_TE_ALERT_REQ_MASK; | |
1234 | } while (!cmpxchg_double(&te->flags, &te->overflow, | |
1235 | te->flags, te->overflow, | |
1236 | te_flags, 0ULL)); | |
8c069ff4 HB |
1237 | |
1238 | /* Advance to next sample-data-block */ | |
1239 | sdbt++; | |
1240 | if (is_link_entry(sdbt)) | |
1241 | sdbt = get_next_sdbt(sdbt); | |
1242 | ||
1243 | /* Update event hardware registers */ | |
1244 | TEAR_REG(hwc) = (unsigned long) sdbt; | |
1245 | ||
1246 | /* Stop processing sample-data if all samples of the current | |
1247 | * sample-data-block were flushed even if it was not full. | |
1248 | */ | |
1249 | if (flush_all && done) | |
1250 | break; | |
1251 | ||
1252 | /* If an event overflow happened, discard samples by | |
1253 | * processing any remaining sample-data-blocks. | |
1254 | */ | |
1255 | if (event_overflow) | |
1256 | flush_all = 1; | |
1257 | } | |
1258 | ||
69f239ed HB |
1259 | /* Account sample overflows in the event hardware structure */ |
1260 | if (sampl_overflow) | |
1261 | OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + | |
1262 | sampl_overflow, 1 + num_sdb); | |
8c069ff4 HB |
1263 | if (sampl_overflow || event_overflow) |
1264 | debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " | |
1265 | "overflow stats: sample=%llu event=%llu\n", | |
1266 | sampl_overflow, event_overflow); | |
1267 | } | |
1268 | ||
ca5955cd PH |
1269 | #define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb) |
1270 | #define AUX_SDB_NUM(aux, start, end) (end >= start ? end - start + 1 : 0) | |
1271 | #define AUX_SDB_NUM_ALERT(aux) AUX_SDB_NUM(aux, aux->head, aux->alert_mark) | |
1272 | #define AUX_SDB_NUM_EMPTY(aux) AUX_SDB_NUM(aux, aux->head, aux->empty_mark) | |
1273 | ||
1274 | /* | |
1275 | * Get trailer entry by index of SDB. | |
1276 | */ | |
1277 | static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux, | |
1278 | unsigned long index) | |
1279 | { | |
1280 | unsigned long sdb; | |
1281 | ||
1282 | index = AUX_SDB_INDEX(aux, index); | |
1283 | sdb = aux->sdb_index[index]; | |
1284 | return (struct hws_trailer_entry *)trailer_entry_ptr(sdb); | |
1285 | } | |
1286 | ||
1287 | /* | |
1288 | * Finish sampling on the cpu. Called by cpumsf_pmu_del() with pmu | |
1289 | * disabled. Collect the full SDBs in AUX buffer which have not reached | |
1290 | * the point of alert indicator. And ignore the SDBs which are not | |
1291 | * full. | |
1292 | * | |
1293 | * 1. Scan SDBs to see how much data is there and consume them. | |
1294 | * 2. Remove alert indicator in the buffer. | |
1295 | */ | |
1296 | static void aux_output_end(struct perf_output_handle *handle) | |
1297 | { | |
1298 | unsigned long i, range_scan, idx; | |
1299 | struct aux_buffer *aux; | |
1300 | struct hws_trailer_entry *te; | |
1301 | ||
1302 | aux = perf_get_aux(handle); | |
1303 | if (!aux) | |
1304 | return; | |
1305 | ||
1306 | range_scan = AUX_SDB_NUM_ALERT(aux); | |
1307 | for (i = 0, idx = aux->head; i < range_scan; i++, idx++) { | |
1308 | te = aux_sdb_trailer(aux, idx); | |
1309 | if (!(te->flags & SDB_TE_BUFFER_FULL_MASK)) | |
1310 | break; | |
1311 | } | |
1312 | /* i is num of SDBs which are full */ | |
1313 | perf_aux_output_end(handle, i << PAGE_SHIFT); | |
1314 | ||
1315 | /* Remove alert indicators in the buffer */ | |
1316 | te = aux_sdb_trailer(aux, aux->alert_mark); | |
1317 | te->flags &= ~SDB_TE_ALERT_REQ_MASK; | |
1318 | ||
1319 | debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i); | |
1320 | } | |
1321 | ||
1322 | /* | |
1323 | * Start sampling on the CPU. Called by cpumsf_pmu_add() when an event | |
1324 | * is first added to the CPU or rescheduled again to the CPU. It is called | |
1325 | * with pmu disabled. | |
1326 | * | |
1327 | * 1. Reset the trailer of SDBs to get ready for new data. | |
1328 | * 2. Tell the hardware where to put the data by reset the SDBs buffer | |
1329 | * head(tear/dear). | |
1330 | */ | |
1331 | static int aux_output_begin(struct perf_output_handle *handle, | |
1332 | struct aux_buffer *aux, | |
1333 | struct cpu_hw_sf *cpuhw) | |
1334 | { | |
1335 | unsigned long range; | |
1336 | unsigned long i, range_scan, idx; | |
1337 | unsigned long head, base, offset; | |
1338 | struct hws_trailer_entry *te; | |
1339 | ||
1340 | if (WARN_ON_ONCE(handle->head & ~PAGE_MASK)) | |
1341 | return -EINVAL; | |
1342 | ||
1343 | aux->head = handle->head >> PAGE_SHIFT; | |
1344 | range = (handle->size + 1) >> PAGE_SHIFT; | |
1345 | if (range <= 1) | |
1346 | return -ENOMEM; | |
1347 | ||
1348 | /* | |
1349 | * SDBs between aux->head and aux->empty_mark are already ready | |
1350 | * for new data. range_scan is num of SDBs not within them. | |
1351 | */ | |
1352 | if (range > AUX_SDB_NUM_EMPTY(aux)) { | |
1353 | range_scan = range - AUX_SDB_NUM_EMPTY(aux); | |
1354 | idx = aux->empty_mark + 1; | |
1355 | for (i = 0; i < range_scan; i++, idx++) { | |
1356 | te = aux_sdb_trailer(aux, idx); | |
1357 | te->flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; | |
1358 | te->flags = te->flags & ~SDB_TE_ALERT_REQ_MASK; | |
1359 | te->overflow = 0; | |
1360 | } | |
1361 | /* Save the position of empty SDBs */ | |
1362 | aux->empty_mark = aux->head + range - 1; | |
1363 | } | |
1364 | ||
1365 | /* Set alert indicator */ | |
1366 | aux->alert_mark = aux->head + range/2 - 1; | |
1367 | te = aux_sdb_trailer(aux, aux->alert_mark); | |
1368 | te->flags = te->flags | SDB_TE_ALERT_REQ_MASK; | |
1369 | ||
1370 | /* Reset hardware buffer head */ | |
1371 | head = AUX_SDB_INDEX(aux, aux->head); | |
1372 | base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE]; | |
1373 | offset = head % CPUM_SF_SDB_PER_TABLE; | |
1374 | cpuhw->lsctl.tear = base + offset * sizeof(unsigned long); | |
1375 | cpuhw->lsctl.dear = aux->sdb_index[head]; | |
1376 | ||
1377 | debug_sprintf_event(sfdbg, 6, "aux_output_begin: " | |
1378 | "head->alert_mark->empty_mark (num_alert, range)" | |
1379 | "[%lx -> %lx -> %lx] (%lx, %lx) " | |
1380 | "tear index %lx, tear %lx dear %lx\n", | |
1381 | aux->head, aux->alert_mark, aux->empty_mark, | |
1382 | AUX_SDB_NUM_ALERT(aux), range, | |
1383 | head / CPUM_SF_SDB_PER_TABLE, | |
1384 | cpuhw->lsctl.tear, | |
1385 | cpuhw->lsctl.dear); | |
1386 | ||
1387 | return 0; | |
1388 | } | |
1389 | ||
1390 | /* | |
1391 | * Set alert indicator on SDB at index @alert_index while sampler is running. | |
1392 | * | |
1393 | * Return true if successfully. | |
1394 | * Return false if full indicator is already set by hardware sampler. | |
1395 | */ | |
1396 | static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, | |
1397 | unsigned long long *overflow) | |
1398 | { | |
1399 | unsigned long long orig_overflow, orig_flags, new_flags; | |
1400 | struct hws_trailer_entry *te; | |
1401 | ||
1402 | te = aux_sdb_trailer(aux, alert_index); | |
1403 | do { | |
1404 | orig_flags = te->flags; | |
1405 | orig_overflow = te->overflow; | |
1406 | *overflow = orig_overflow; | |
1407 | if (orig_flags & SDB_TE_BUFFER_FULL_MASK) { | |
1408 | /* | |
1409 | * SDB is already set by hardware. | |
1410 | * Abort and try to set somewhere | |
1411 | * behind. | |
1412 | */ | |
1413 | return false; | |
1414 | } | |
1415 | new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK; | |
1416 | } while (!cmpxchg_double(&te->flags, &te->overflow, | |
1417 | orig_flags, orig_overflow, | |
1418 | new_flags, 0ULL)); | |
1419 | return true; | |
1420 | } | |
1421 | ||
1422 | /* | |
1423 | * aux_reset_buffer() - Scan and setup SDBs for new samples | |
1424 | * @aux: The AUX buffer to set | |
1425 | * @range: The range of SDBs to scan started from aux->head | |
1426 | * @overflow: Set to overflow count | |
1427 | * | |
1428 | * Set alert indicator on the SDB at index of aux->alert_mark. If this SDB is | |
1429 | * marked as empty, check if it is already set full by the hardware sampler. | |
1430 | * If yes, that means new data is already there before we can set an alert | |
1431 | * indicator. Caller should try to set alert indicator to some position behind. | |
1432 | * | |
1433 | * Scan the SDBs in AUX buffer from behind aux->empty_mark. They are used | |
1434 | * previously and have already been consumed by user space. Reset these SDBs | |
1435 | * (clear full indicator and alert indicator) for new data. | |
1436 | * If aux->alert_mark fall in this area, just set it. Overflow count is | |
1437 | * recorded while scanning. | |
1438 | * | |
1439 | * SDBs between aux->head and aux->empty_mark are already reset at last time. | |
1440 | * and ready for new samples. So scanning on this area could be skipped. | |
1441 | * | |
1442 | * Return true if alert indicator is set successfully and false if not. | |
1443 | */ | |
1444 | static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, | |
1445 | unsigned long long *overflow) | |
1446 | { | |
1447 | unsigned long long orig_overflow, orig_flags, new_flags; | |
1448 | unsigned long i, range_scan, idx; | |
1449 | struct hws_trailer_entry *te; | |
1450 | ||
1451 | if (range <= AUX_SDB_NUM_EMPTY(aux)) | |
1452 | /* | |
1453 | * No need to scan. All SDBs in range are marked as empty. | |
1454 | * Just set alert indicator. Should check race with hardware | |
1455 | * sampler. | |
1456 | */ | |
1457 | return aux_set_alert(aux, aux->alert_mark, overflow); | |
1458 | ||
1459 | if (aux->alert_mark <= aux->empty_mark) | |
1460 | /* | |
1461 | * Set alert indicator on empty SDB. Should check race | |
1462 | * with hardware sampler. | |
1463 | */ | |
1464 | if (!aux_set_alert(aux, aux->alert_mark, overflow)) | |
1465 | return false; | |
1466 | ||
1467 | /* | |
1468 | * Scan the SDBs to clear full and alert indicator used previously. | |
1469 | * Start scanning from one SDB behind empty_mark. If the new alert | |
1470 | * indicator fall into this range, set it. | |
1471 | */ | |
1472 | range_scan = range - AUX_SDB_NUM_EMPTY(aux); | |
1473 | idx = aux->empty_mark + 1; | |
1474 | for (i = 0; i < range_scan; i++, idx++) { | |
1475 | te = aux_sdb_trailer(aux, idx); | |
1476 | do { | |
1477 | orig_flags = te->flags; | |
1478 | orig_overflow = te->overflow; | |
1479 | new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK; | |
1480 | if (idx == aux->alert_mark) | |
1481 | new_flags |= SDB_TE_ALERT_REQ_MASK; | |
1482 | else | |
1483 | new_flags &= ~SDB_TE_ALERT_REQ_MASK; | |
1484 | } while (!cmpxchg_double(&te->flags, &te->overflow, | |
1485 | orig_flags, orig_overflow, | |
1486 | new_flags, 0ULL)); | |
1487 | *overflow += orig_overflow; | |
1488 | } | |
1489 | ||
1490 | /* Update empty_mark to new position */ | |
1491 | aux->empty_mark = aux->head + range - 1; | |
1492 | ||
1493 | return true; | |
1494 | } | |
1495 | ||
1496 | /* | |
1497 | * Measurement alert handler for diagnostic mode sampling. | |
1498 | */ | |
1499 | static void hw_collect_aux(struct cpu_hw_sf *cpuhw) | |
1500 | { | |
1501 | struct aux_buffer *aux; | |
1502 | int done = 0; | |
1503 | unsigned long range = 0, size; | |
1504 | unsigned long long overflow = 0; | |
1505 | struct perf_output_handle *handle = &cpuhw->handle; | |
1506 | unsigned long num_sdb; | |
1507 | ||
1508 | aux = perf_get_aux(handle); | |
1509 | if (WARN_ON_ONCE(!aux)) | |
1510 | return; | |
1511 | ||
1512 | /* Inform user space new data arrived */ | |
1513 | size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; | |
1514 | perf_aux_output_end(handle, size); | |
1515 | num_sdb = aux->sfb.num_sdb; | |
1516 | ||
1517 | while (!done) { | |
1518 | /* Get an output handle */ | |
1519 | aux = perf_aux_output_begin(handle, cpuhw->event); | |
1520 | if (handle->size == 0) { | |
1521 | pr_err("The AUX buffer with %lu pages for the " | |
1522 | "diagnostic-sampling mode is full\n", | |
1523 | num_sdb); | |
1524 | debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n"); | |
1525 | break; | |
1526 | } | |
1527 | if (WARN_ON_ONCE(!aux)) | |
1528 | return; | |
1529 | ||
1530 | /* Update head and alert_mark to new position */ | |
1531 | aux->head = handle->head >> PAGE_SHIFT; | |
1532 | range = (handle->size + 1) >> PAGE_SHIFT; | |
1533 | if (range == 1) | |
1534 | aux->alert_mark = aux->head; | |
1535 | else | |
1536 | aux->alert_mark = aux->head + range/2 - 1; | |
1537 | ||
1538 | if (aux_reset_buffer(aux, range, &overflow)) { | |
1539 | if (!overflow) { | |
1540 | done = 1; | |
1541 | break; | |
1542 | } | |
1543 | size = range << PAGE_SHIFT; | |
1544 | perf_aux_output_end(&cpuhw->handle, size); | |
1545 | pr_err("Sample data caused the AUX buffer with %lu " | |
1546 | "pages to overflow\n", num_sdb); | |
1547 | debug_sprintf_event(sfdbg, 1, "head %lx range %lx " | |
1548 | "overflow %llx\n", | |
1549 | aux->head, range, overflow); | |
1550 | } else { | |
1551 | size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; | |
1552 | perf_aux_output_end(&cpuhw->handle, size); | |
1553 | debug_sprintf_event(sfdbg, 6, "head %lx alert %lx " | |
1554 | "already full, try another\n", | |
1555 | aux->head, aux->alert_mark); | |
1556 | } | |
1557 | } | |
1558 | ||
1559 | if (done) | |
1560 | debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: " | |
1561 | "[%lx -> %lx -> %lx] (%lx, %lx)\n", | |
1562 | aux->head, aux->alert_mark, aux->empty_mark, | |
1563 | AUX_SDB_NUM_ALERT(aux), range); | |
1564 | } | |
1565 | ||
1566 | /* | |
1567 | * Callback when freeing AUX buffers. | |
1568 | */ | |
1569 | static void aux_buffer_free(void *data) | |
1570 | { | |
1571 | struct aux_buffer *aux = data; | |
1572 | unsigned long i, num_sdbt; | |
1573 | ||
1574 | if (!aux) | |
1575 | return; | |
1576 | ||
1577 | /* Free SDBT. SDB is freed by the caller */ | |
1578 | num_sdbt = aux->sfb.num_sdbt; | |
1579 | for (i = 0; i < num_sdbt; i++) | |
1580 | free_page(aux->sdbt_index[i]); | |
1581 | ||
1582 | kfree(aux->sdbt_index); | |
1583 | kfree(aux->sdb_index); | |
1584 | kfree(aux); | |
1585 | ||
1586 | debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free " | |
1587 | "%lu SDBTs\n", num_sdbt); | |
1588 | } | |
1589 | ||
1590 | /* | |
1591 | * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling | |
1592 | * @cpu: On which to allocate, -1 means current | |
1593 | * @pages: Array of pointers to buffer pages passed from perf core | |
1594 | * @nr_pages: Total pages | |
1595 | * @snapshot: Flag for snapshot mode | |
1596 | * | |
1597 | * This is the callback when setup an event using AUX buffer. Perf tool can | |
1598 | * trigger this by an additional mmap() call on the event. Unlike the buffer | |
1599 | * for basic samples, AUX buffer belongs to the event. It is scheduled with | |
1600 | * the task among online cpus when it is a per-thread event. | |
1601 | * | |
1602 | * Return the private AUX buffer structure if success or NULL if fails. | |
1603 | */ | |
1604 | static void *aux_buffer_setup(int cpu, void **pages, int nr_pages, | |
1605 | bool snapshot) | |
1606 | { | |
1607 | struct sf_buffer *sfb; | |
1608 | struct aux_buffer *aux; | |
1609 | unsigned long *new, *tail; | |
1610 | int i, n_sdbt; | |
1611 | ||
1612 | if (!nr_pages || !pages) | |
1613 | return NULL; | |
1614 | ||
1615 | if (nr_pages > CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR) { | |
1616 | pr_err("AUX buffer size (%i pages) is larger than the " | |
1617 | "maximum sampling buffer limit\n", | |
1618 | nr_pages); | |
1619 | return NULL; | |
1620 | } else if (nr_pages < CPUM_SF_MIN_SDB * CPUM_SF_SDB_DIAG_FACTOR) { | |
1621 | pr_err("AUX buffer size (%i pages) is less than the " | |
1622 | "minimum sampling buffer limit\n", | |
1623 | nr_pages); | |
1624 | return NULL; | |
1625 | } | |
1626 | ||
1627 | /* Allocate aux_buffer struct for the event */ | |
1628 | aux = kmalloc(sizeof(struct aux_buffer), GFP_KERNEL); | |
1629 | if (!aux) | |
1630 | goto no_aux; | |
1631 | sfb = &aux->sfb; | |
1632 | ||
1633 | /* Allocate sdbt_index for fast reference */ | |
1634 | n_sdbt = (nr_pages + CPUM_SF_SDB_PER_TABLE - 1) / CPUM_SF_SDB_PER_TABLE; | |
1635 | aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL); | |
1636 | if (!aux->sdbt_index) | |
1637 | goto no_sdbt_index; | |
1638 | ||
1639 | /* Allocate sdb_index for fast reference */ | |
1640 | aux->sdb_index = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL); | |
1641 | if (!aux->sdb_index) | |
1642 | goto no_sdb_index; | |
1643 | ||
1644 | /* Allocate the first SDBT */ | |
1645 | sfb->num_sdbt = 0; | |
1646 | sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); | |
1647 | if (!sfb->sdbt) | |
1648 | goto no_sdbt; | |
1649 | aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt; | |
1650 | tail = sfb->tail = sfb->sdbt; | |
1651 | ||
1652 | /* | |
1653 | * Link the provided pages of AUX buffer to SDBT. | |
1654 | * Allocate SDBT if needed. | |
1655 | */ | |
1656 | for (i = 0; i < nr_pages; i++, tail++) { | |
1657 | if (require_table_link(tail)) { | |
1658 | new = (unsigned long *) get_zeroed_page(GFP_KERNEL); | |
1659 | if (!new) | |
1660 | goto no_sdbt; | |
1661 | aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new; | |
1662 | /* Link current page to tail of chain */ | |
1663 | *tail = (unsigned long)(void *) new + 1; | |
1664 | tail = new; | |
1665 | } | |
1666 | /* Tail is the entry in a SDBT */ | |
1667 | *tail = (unsigned long)pages[i]; | |
1668 | aux->sdb_index[i] = (unsigned long)pages[i]; | |
1669 | } | |
1670 | sfb->num_sdb = nr_pages; | |
1671 | ||
1672 | /* Link the last entry in the SDBT to the first SDBT */ | |
1673 | *tail = (unsigned long) sfb->sdbt + 1; | |
1674 | sfb->tail = tail; | |
1675 | ||
1676 | /* | |
1677 | * Initial all SDBs are zeroed. Mark it as empty. | |
1678 | * So there is no need to clear the full indicator | |
1679 | * when this event is first added. | |
1680 | */ | |
1681 | aux->empty_mark = sfb->num_sdb - 1; | |
1682 | ||
1683 | debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs" | |
1684 | " and %lu SDBs\n", | |
1685 | sfb->num_sdbt, sfb->num_sdb); | |
1686 | ||
1687 | return aux; | |
1688 | ||
1689 | no_sdbt: | |
1690 | /* SDBs (AUX buffer pages) are freed by caller */ | |
1691 | for (i = 0; i < sfb->num_sdbt; i++) | |
1692 | free_page(aux->sdbt_index[i]); | |
1693 | kfree(aux->sdb_index); | |
1694 | no_sdb_index: | |
1695 | kfree(aux->sdbt_index); | |
1696 | no_sdbt_index: | |
1697 | kfree(aux); | |
1698 | no_aux: | |
1699 | return NULL; | |
1700 | } | |
1701 | ||
8c069ff4 HB |
1702 | static void cpumsf_pmu_read(struct perf_event *event) |
1703 | { | |
1704 | /* Nothing to do ... updates are interrupt-driven */ | |
1705 | } | |
1706 | ||
1707 | /* Activate sampling control. | |
1708 | * Next call of pmu_enable() starts sampling. | |
1709 | */ | |
1710 | static void cpumsf_pmu_start(struct perf_event *event, int flags) | |
1711 | { | |
eb7e7d76 | 1712 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
1713 | |
1714 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | |
1715 | return; | |
1716 | ||
1717 | if (flags & PERF_EF_RELOAD) | |
1718 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | |
1719 | ||
1720 | perf_pmu_disable(event->pmu); | |
1721 | event->hw.state = 0; | |
1722 | cpuhw->lsctl.cs = 1; | |
7e75fc3f HB |
1723 | if (SAMPL_DIAG_MODE(&event->hw)) |
1724 | cpuhw->lsctl.cd = 1; | |
8c069ff4 HB |
1725 | perf_pmu_enable(event->pmu); |
1726 | } | |
1727 | ||
1728 | /* Deactivate sampling control. | |
1729 | * Next call of pmu_enable() stops sampling. | |
1730 | */ | |
1731 | static void cpumsf_pmu_stop(struct perf_event *event, int flags) | |
1732 | { | |
eb7e7d76 | 1733 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
1734 | |
1735 | if (event->hw.state & PERF_HES_STOPPED) | |
1736 | return; | |
1737 | ||
1738 | perf_pmu_disable(event->pmu); | |
1739 | cpuhw->lsctl.cs = 0; | |
7e75fc3f | 1740 | cpuhw->lsctl.cd = 0; |
8c069ff4 HB |
1741 | event->hw.state |= PERF_HES_STOPPED; |
1742 | ||
1743 | if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { | |
1744 | hw_perf_event_update(event, 1); | |
1745 | event->hw.state |= PERF_HES_UPTODATE; | |
1746 | } | |
1747 | perf_pmu_enable(event->pmu); | |
1748 | } | |
1749 | ||
1750 | static int cpumsf_pmu_add(struct perf_event *event, int flags) | |
1751 | { | |
eb7e7d76 | 1752 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
cbf6948f | 1753 | struct aux_buffer *aux; |
8c069ff4 HB |
1754 | int err; |
1755 | ||
1756 | if (cpuhw->flags & PMU_F_IN_USE) | |
1757 | return -EAGAIN; | |
1758 | ||
cbf6948f | 1759 | if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt) |
8c069ff4 HB |
1760 | return -EINVAL; |
1761 | ||
1762 | err = 0; | |
1763 | perf_pmu_disable(event->pmu); | |
1764 | ||
1765 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | |
1766 | ||
1767 | /* Set up sampling controls. Always program the sampling register | |
1768 | * using the SDB-table start. Reset TEAR_REG event hardware register | |
1769 | * that is used by hw_perf_event_update() to store the sampling buffer | |
1770 | * position after samples have been flushed. | |
1771 | */ | |
1772 | cpuhw->lsctl.s = 0; | |
1773 | cpuhw->lsctl.h = 1; | |
8c069ff4 | 1774 | cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); |
cbf6948f PH |
1775 | if (!SAMPL_DIAG_MODE(&event->hw)) { |
1776 | cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; | |
1777 | cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; | |
1778 | hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); | |
1779 | } | |
8c069ff4 HB |
1780 | |
1781 | /* Ensure sampling functions are in the disabled state. If disabled, | |
1782 | * switch on sampling enable control. */ | |
7e75fc3f | 1783 | if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) { |
8c069ff4 HB |
1784 | err = -EAGAIN; |
1785 | goto out; | |
1786 | } | |
cbf6948f PH |
1787 | if (SAMPL_DIAG_MODE(&event->hw)) { |
1788 | aux = perf_aux_output_begin(&cpuhw->handle, event); | |
1789 | if (!aux) { | |
1790 | err = -EINVAL; | |
1791 | goto out; | |
1792 | } | |
1793 | err = aux_output_begin(&cpuhw->handle, aux, cpuhw); | |
1794 | if (err) | |
1795 | goto out; | |
7e75fc3f | 1796 | cpuhw->lsctl.ed = 1; |
cbf6948f PH |
1797 | } |
1798 | cpuhw->lsctl.es = 1; | |
8c069ff4 HB |
1799 | |
1800 | /* Set in_use flag and store event */ | |
8c069ff4 HB |
1801 | cpuhw->event = event; |
1802 | cpuhw->flags |= PMU_F_IN_USE; | |
1803 | ||
1804 | if (flags & PERF_EF_START) | |
1805 | cpumsf_pmu_start(event, PERF_EF_RELOAD); | |
1806 | out: | |
1807 | perf_event_update_userpage(event); | |
1808 | perf_pmu_enable(event->pmu); | |
1809 | return err; | |
1810 | } | |
1811 | ||
1812 | static void cpumsf_pmu_del(struct perf_event *event, int flags) | |
1813 | { | |
eb7e7d76 | 1814 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
1815 | |
1816 | perf_pmu_disable(event->pmu); | |
1817 | cpumsf_pmu_stop(event, PERF_EF_UPDATE); | |
1818 | ||
1819 | cpuhw->lsctl.es = 0; | |
7e75fc3f | 1820 | cpuhw->lsctl.ed = 0; |
8c069ff4 HB |
1821 | cpuhw->flags &= ~PMU_F_IN_USE; |
1822 | cpuhw->event = NULL; | |
1823 | ||
cbf6948f PH |
1824 | if (SAMPL_DIAG_MODE(&event->hw)) |
1825 | aux_output_end(&cpuhw->handle); | |
8c069ff4 HB |
1826 | perf_event_update_userpage(event); |
1827 | perf_pmu_enable(event->pmu); | |
1828 | } | |
1829 | ||
8c069ff4 | 1830 | CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); |
7e75fc3f | 1831 | CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); |
8c069ff4 HB |
1832 | |
1833 | static struct attribute *cpumsf_pmu_events_attr[] = { | |
1834 | CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), | |
0a648150 | 1835 | NULL, |
8c069ff4 HB |
1836 | NULL, |
1837 | }; | |
1838 | ||
1839 | PMU_FORMAT_ATTR(event, "config:0-63"); | |
1840 | ||
1841 | static struct attribute *cpumsf_pmu_format_attr[] = { | |
1842 | &format_attr_event.attr, | |
1843 | NULL, | |
1844 | }; | |
1845 | ||
1846 | static struct attribute_group cpumsf_pmu_events_group = { | |
1847 | .name = "events", | |
1848 | .attrs = cpumsf_pmu_events_attr, | |
1849 | }; | |
1850 | static struct attribute_group cpumsf_pmu_format_group = { | |
1851 | .name = "format", | |
1852 | .attrs = cpumsf_pmu_format_attr, | |
1853 | }; | |
1854 | static const struct attribute_group *cpumsf_pmu_attr_groups[] = { | |
1855 | &cpumsf_pmu_events_group, | |
1856 | &cpumsf_pmu_format_group, | |
1857 | NULL, | |
1858 | }; | |
1859 | ||
1860 | static struct pmu cpumf_sampling = { | |
1861 | .pmu_enable = cpumsf_pmu_enable, | |
1862 | .pmu_disable = cpumsf_pmu_disable, | |
1863 | ||
1864 | .event_init = cpumsf_pmu_event_init, | |
1865 | .add = cpumsf_pmu_add, | |
1866 | .del = cpumsf_pmu_del, | |
1867 | ||
1868 | .start = cpumsf_pmu_start, | |
1869 | .stop = cpumsf_pmu_stop, | |
1870 | .read = cpumsf_pmu_read, | |
1871 | ||
8c069ff4 | 1872 | .attr_groups = cpumsf_pmu_attr_groups, |
ca5955cd PH |
1873 | |
1874 | .setup_aux = aux_buffer_setup, | |
1875 | .free_aux = aux_buffer_free, | |
8c069ff4 HB |
1876 | }; |
1877 | ||
1878 | static void cpumf_measurement_alert(struct ext_code ext_code, | |
1879 | unsigned int alert, unsigned long unused) | |
1880 | { | |
1881 | struct cpu_hw_sf *cpuhw; | |
1882 | ||
1883 | if (!(alert & CPU_MF_INT_SF_MASK)) | |
1884 | return; | |
1885 | inc_irq_stat(IRQEXT_CMS); | |
eb7e7d76 | 1886 | cpuhw = this_cpu_ptr(&cpu_hw_sf); |
8c069ff4 HB |
1887 | |
1888 | /* Measurement alerts are shared and might happen when the PMU | |
1889 | * is not reserved. Ignore these alerts in this case. */ | |
1890 | if (!(cpuhw->flags & PMU_F_RESERVED)) | |
1891 | return; | |
1892 | ||
1893 | /* The processing below must take care of multiple alert events that | |
1894 | * might be indicated concurrently. */ | |
1895 | ||
1896 | /* Program alert request */ | |
1897 | if (alert & CPU_MF_INT_SF_PRA) { | |
1898 | if (cpuhw->flags & PMU_F_IN_USE) | |
cbf6948f PH |
1899 | if (SAMPL_DIAG_MODE(&cpuhw->event->hw)) |
1900 | hw_collect_aux(cpuhw); | |
1901 | else | |
1902 | hw_perf_event_update(cpuhw->event, 0); | |
8c069ff4 HB |
1903 | else |
1904 | WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE)); | |
1905 | } | |
1906 | ||
1907 | /* Report measurement alerts only for non-PRA codes */ | |
1908 | if (alert != CPU_MF_INT_SF_PRA) | |
1909 | debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert); | |
1910 | ||
1911 | /* Sampling authorization change request */ | |
1912 | if (alert & CPU_MF_INT_SF_SACA) | |
1913 | qsi(&cpuhw->qsi); | |
1914 | ||
1915 | /* Loss of sample data due to high-priority machine activities */ | |
1916 | if (alert & CPU_MF_INT_SF_LSDA) { | |
1917 | pr_err("Sample data was lost\n"); | |
1918 | cpuhw->flags |= PMU_F_ERR_LSDA; | |
1919 | sf_disable(); | |
1920 | } | |
1921 | ||
1922 | /* Invalid sampling buffer entry */ | |
1923 | if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) { | |
1924 | pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n", | |
1925 | alert); | |
1926 | cpuhw->flags |= PMU_F_ERR_IBE; | |
1927 | sf_disable(); | |
1928 | } | |
1929 | } | |
e3d617fe | 1930 | static int cpusf_pmu_setup(unsigned int cpu, int flags) |
8c069ff4 | 1931 | { |
8c069ff4 HB |
1932 | /* Ignore the notification if no events are scheduled on the PMU. |
1933 | * This might be racy... | |
1934 | */ | |
1935 | if (!atomic_read(&num_events)) | |
e3d617fe | 1936 | return 0; |
8c069ff4 | 1937 | |
e3d617fe SAS |
1938 | local_irq_disable(); |
1939 | setup_pmc_cpu(&flags); | |
1940 | local_irq_enable(); | |
1941 | return 0; | |
1942 | } | |
1943 | ||
1944 | static int s390_pmu_sf_online_cpu(unsigned int cpu) | |
1945 | { | |
1946 | return cpusf_pmu_setup(cpu, PMC_INIT); | |
1947 | } | |
1948 | ||
1949 | static int s390_pmu_sf_offline_cpu(unsigned int cpu) | |
1950 | { | |
1951 | return cpusf_pmu_setup(cpu, PMC_RELEASE); | |
8c069ff4 HB |
1952 | } |
1953 | ||
69f239ed HB |
1954 | static int param_get_sfb_size(char *buffer, const struct kernel_param *kp) |
1955 | { | |
1956 | if (!cpum_sf_avail()) | |
1957 | return -ENODEV; | |
1958 | return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); | |
1959 | } | |
1960 | ||
1961 | static int param_set_sfb_size(const char *val, const struct kernel_param *kp) | |
1962 | { | |
1963 | int rc; | |
1964 | unsigned long min, max; | |
1965 | ||
1966 | if (!cpum_sf_avail()) | |
1967 | return -ENODEV; | |
1968 | if (!val || !strlen(val)) | |
1969 | return -EINVAL; | |
1970 | ||
1971 | /* Valid parameter values: "min,max" or "max" */ | |
1972 | min = CPUM_SF_MIN_SDB; | |
1973 | max = CPUM_SF_MAX_SDB; | |
1974 | if (strchr(val, ',')) | |
1975 | rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL; | |
1976 | else | |
1977 | rc = kstrtoul(val, 10, &max); | |
1978 | ||
1979 | if (min < 2 || min >= max || max > get_num_physpages()) | |
1980 | rc = -EINVAL; | |
1981 | if (rc) | |
1982 | return rc; | |
1983 | ||
1984 | sfb_set_limits(min, max); | |
7e75fc3f HB |
1985 | pr_info("The sampling buffer limits have changed to: " |
1986 | "min=%lu max=%lu (diag=x%lu)\n", | |
1987 | CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR); | |
69f239ed HB |
1988 | return 0; |
1989 | } | |
1990 | ||
1991 | #define param_check_sfb_size(name, p) __param_check(name, p, void) | |
9c27847d | 1992 | static const struct kernel_param_ops param_ops_sfb_size = { |
69f239ed HB |
1993 | .set = param_set_sfb_size, |
1994 | .get = param_get_sfb_size, | |
1995 | }; | |
1996 | ||
7e75fc3f HB |
1997 | #define RS_INIT_FAILURE_QSI 0x0001 |
1998 | #define RS_INIT_FAILURE_BSDES 0x0002 | |
1999 | #define RS_INIT_FAILURE_ALRT 0x0003 | |
2000 | #define RS_INIT_FAILURE_PERF 0x0004 | |
2001 | static void __init pr_cpumsf_err(unsigned int reason) | |
2002 | { | |
2003 | pr_err("Sampling facility support for perf is not available: " | |
2004 | "reason=%04x\n", reason); | |
2005 | } | |
2006 | ||
8c069ff4 HB |
2007 | static int __init init_cpum_sampling_pmu(void) |
2008 | { | |
7e75fc3f | 2009 | struct hws_qsi_info_block si; |
8c069ff4 HB |
2010 | int err; |
2011 | ||
2012 | if (!cpum_sf_avail()) | |
2013 | return -ENODEV; | |
2014 | ||
7e75fc3f HB |
2015 | memset(&si, 0, sizeof(si)); |
2016 | if (qsi(&si)) { | |
2017 | pr_cpumsf_err(RS_INIT_FAILURE_QSI); | |
2018 | return -ENODEV; | |
2019 | } | |
2020 | ||
9232c3c7 HB |
2021 | if (!si.as && !si.ad) |
2022 | return -ENODEV; | |
2023 | ||
7e75fc3f HB |
2024 | if (si.bsdes != sizeof(struct hws_basic_entry)) { |
2025 | pr_cpumsf_err(RS_INIT_FAILURE_BSDES); | |
2026 | return -EINVAL; | |
2027 | } | |
2028 | ||
0a648150 | 2029 | if (si.ad) { |
7e75fc3f | 2030 | sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); |
0a648150 HB |
2031 | cpumsf_pmu_events_attr[1] = |
2032 | CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG); | |
2033 | } | |
7e75fc3f | 2034 | |
8c069ff4 HB |
2035 | sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); |
2036 | if (!sfdbg) | |
2037 | pr_err("Registering for s390dbf failed\n"); | |
2038 | debug_register_view(sfdbg, &debug_sprintf_view); | |
2039 | ||
1dad093b TH |
2040 | err = register_external_irq(EXT_IRQ_MEASURE_ALERT, |
2041 | cpumf_measurement_alert); | |
8c069ff4 | 2042 | if (err) { |
7e75fc3f | 2043 | pr_cpumsf_err(RS_INIT_FAILURE_ALRT); |
8c069ff4 HB |
2044 | goto out; |
2045 | } | |
2046 | ||
2047 | err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW); | |
2048 | if (err) { | |
7e75fc3f | 2049 | pr_cpumsf_err(RS_INIT_FAILURE_PERF); |
1dad093b TH |
2050 | unregister_external_irq(EXT_IRQ_MEASURE_ALERT, |
2051 | cpumf_measurement_alert); | |
8c069ff4 HB |
2052 | goto out; |
2053 | } | |
e3d617fe | 2054 | |
73c1b41e | 2055 | cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "perf/s390/sf:online", |
e3d617fe | 2056 | s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu); |
8c069ff4 HB |
2057 | out: |
2058 | return err; | |
2059 | } | |
2060 | arch_initcall(init_cpum_sampling_pmu); | |
69f239ed | 2061 | core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); |