Commit | Line | Data |
---|---|---|
90e457f7 AH |
1 | /* |
2 | * intel_pt.c: Intel Processor Trace support | |
3 | * Copyright (c) 2013-2015, Intel Corporation. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms and conditions of the GNU General Public License, | |
7 | * version 2, as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope it will be useful, but WITHOUT | |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
12 | * more details. | |
13 | * | |
14 | */ | |
15 | ||
16 | #include <stdbool.h> | |
17 | #include <linux/kernel.h> | |
18 | #include <linux/types.h> | |
19 | #include <linux/bitops.h> | |
20 | #include <linux/log2.h> | |
11fa7cb8 | 21 | #include <cpuid.h> |
90e457f7 AH |
22 | |
23 | #include "../../perf.h" | |
24 | #include "../../util/session.h" | |
25 | #include "../../util/event.h" | |
26 | #include "../../util/evlist.h" | |
27 | #include "../../util/evsel.h" | |
28 | #include "../../util/cpumap.h" | |
4b6ab94e | 29 | #include <subcmd/parse-options.h> |
90e457f7 AH |
30 | #include "../../util/parse-events.h" |
31 | #include "../../util/pmu.h" | |
32 | #include "../../util/debug.h" | |
33 | #include "../../util/auxtrace.h" | |
34 | #include "../../util/tsc.h" | |
35 | #include "../../util/intel-pt.h" | |
36 | ||
37 | #define KiB(x) ((x) * 1024) | |
38 | #define MiB(x) ((x) * 1024 * 1024) | |
39 | #define KiB_MASK(x) (KiB(x) - 1) | |
40 | #define MiB_MASK(x) (MiB(x) - 1) | |
41 | ||
42 | #define INTEL_PT_DEFAULT_SAMPLE_SIZE KiB(4) | |
43 | ||
44 | #define INTEL_PT_MAX_SAMPLE_SIZE KiB(60) | |
45 | ||
46 | #define INTEL_PT_PSB_PERIOD_NEAR 256 | |
47 | ||
48 | struct intel_pt_snapshot_ref { | |
49 | void *ref_buf; | |
50 | size_t ref_offset; | |
51 | bool wrapped; | |
52 | }; | |
53 | ||
54 | struct intel_pt_recording { | |
55 | struct auxtrace_record itr; | |
56 | struct perf_pmu *intel_pt_pmu; | |
57 | int have_sched_switch; | |
58 | struct perf_evlist *evlist; | |
59 | bool snapshot_mode; | |
60 | bool snapshot_init_done; | |
61 | size_t snapshot_size; | |
62 | size_t snapshot_ref_buf_size; | |
63 | int snapshot_ref_cnt; | |
64 | struct intel_pt_snapshot_ref *snapshot_refs; | |
65 | }; | |
66 | ||
67 | static int intel_pt_parse_terms_with_default(struct list_head *formats, | |
68 | const char *str, | |
69 | u64 *config) | |
70 | { | |
71 | struct list_head *terms; | |
72 | struct perf_event_attr attr = { .size = 0, }; | |
73 | int err; | |
74 | ||
75 | terms = malloc(sizeof(struct list_head)); | |
76 | if (!terms) | |
77 | return -ENOMEM; | |
78 | ||
79 | INIT_LIST_HEAD(terms); | |
80 | ||
81 | err = parse_events_terms(terms, str); | |
82 | if (err) | |
83 | goto out_free; | |
84 | ||
85 | attr.config = *config; | |
86 | err = perf_pmu__config_terms(formats, &attr, terms, true, NULL); | |
87 | if (err) | |
88 | goto out_free; | |
89 | ||
90 | *config = attr.config; | |
91 | out_free: | |
2146afc6 | 92 | parse_events_terms__delete(terms); |
90e457f7 AH |
93 | return err; |
94 | } | |
95 | ||
96 | static int intel_pt_parse_terms(struct list_head *formats, const char *str, | |
97 | u64 *config) | |
98 | { | |
99 | *config = 0; | |
100 | return intel_pt_parse_terms_with_default(formats, str, config); | |
101 | } | |
102 | ||
bc9b6bf0 | 103 | static u64 intel_pt_masked_bits(u64 mask, u64 bits) |
90e457f7 | 104 | { |
bc9b6bf0 AH |
105 | const u64 top_bit = 1ULL << 63; |
106 | u64 res = 0; | |
107 | int i; | |
108 | ||
109 | for (i = 0; i < 64; i++) { | |
110 | if (mask & top_bit) { | |
111 | res <<= 1; | |
112 | if (bits & top_bit) | |
113 | res |= 1; | |
114 | } | |
115 | mask <<= 1; | |
116 | bits <<= 1; | |
117 | } | |
118 | ||
119 | return res; | |
120 | } | |
121 | ||
122 | static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str, | |
123 | struct perf_evlist *evlist, u64 *res) | |
124 | { | |
125 | struct perf_evsel *evsel; | |
126 | u64 mask; | |
127 | ||
128 | *res = 0; | |
129 | ||
130 | mask = perf_pmu__format_bits(&intel_pt_pmu->format, str); | |
131 | if (!mask) | |
132 | return -EINVAL; | |
133 | ||
e5cadb93 | 134 | evlist__for_each_entry(evlist, evsel) { |
bc9b6bf0 AH |
135 | if (evsel->attr.type == intel_pt_pmu->type) { |
136 | *res = intel_pt_masked_bits(mask, evsel->attr.config); | |
137 | return 0; | |
138 | } | |
139 | } | |
140 | ||
141 | return -EINVAL; | |
142 | } | |
143 | ||
144 | static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu, | |
145 | struct perf_evlist *evlist) | |
146 | { | |
147 | u64 val; | |
148 | int err, topa_multiple_entries; | |
149 | size_t psb_period; | |
150 | ||
151 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries", | |
152 | "%d", &topa_multiple_entries) != 1) | |
153 | topa_multiple_entries = 0; | |
154 | ||
155 | /* | |
156 | * Use caps/topa_multiple_entries to indicate early hardware that had | |
157 | * extra frequent PSBs. | |
158 | */ | |
159 | if (!topa_multiple_entries) { | |
160 | psb_period = 256; | |
161 | goto out; | |
162 | } | |
163 | ||
164 | err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val); | |
165 | if (err) | |
166 | val = 0; | |
167 | ||
168 | psb_period = 1 << (val + 11); | |
169 | out: | |
170 | pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period); | |
171 | return psb_period; | |
172 | } | |
173 | ||
174 | static int intel_pt_pick_bit(int bits, int target) | |
175 | { | |
176 | int pos, pick = -1; | |
177 | ||
178 | for (pos = 0; bits; bits >>= 1, pos++) { | |
179 | if (bits & 1) { | |
180 | if (pos <= target || pick < 0) | |
181 | pick = pos; | |
182 | if (pos >= target) | |
183 | break; | |
184 | } | |
185 | } | |
186 | ||
187 | return pick; | |
90e457f7 AH |
188 | } |
189 | ||
190 | static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) | |
191 | { | |
bc9b6bf0 | 192 | char buf[256]; |
b45fc0bf | 193 | int mtc, mtc_periods = 0, mtc_period; |
bc9b6bf0 AH |
194 | int psb_cyc, psb_periods, psb_period; |
195 | int pos = 0; | |
90e457f7 AH |
196 | u64 config; |
197 | ||
bc9b6bf0 AH |
198 | pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); |
199 | ||
b45fc0bf AH |
200 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d", |
201 | &mtc) != 1) | |
202 | mtc = 1; | |
203 | ||
204 | if (mtc) { | |
205 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x", | |
206 | &mtc_periods) != 1) | |
207 | mtc_periods = 0; | |
208 | if (mtc_periods) { | |
209 | mtc_period = intel_pt_pick_bit(mtc_periods, 3); | |
210 | pos += scnprintf(buf + pos, sizeof(buf) - pos, | |
211 | ",mtc,mtc_period=%d", mtc_period); | |
212 | } | |
213 | } | |
214 | ||
bc9b6bf0 AH |
215 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d", |
216 | &psb_cyc) != 1) | |
217 | psb_cyc = 1; | |
218 | ||
b45fc0bf | 219 | if (psb_cyc && mtc_periods) { |
bc9b6bf0 AH |
220 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x", |
221 | &psb_periods) != 1) | |
222 | psb_periods = 0; | |
223 | if (psb_periods) { | |
224 | psb_period = intel_pt_pick_bit(psb_periods, 3); | |
225 | pos += scnprintf(buf + pos, sizeof(buf) - pos, | |
226 | ",psb_period=%d", psb_period); | |
227 | } | |
228 | } | |
229 | ||
230 | pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); | |
231 | ||
232 | intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); | |
233 | ||
90e457f7 AH |
234 | return config; |
235 | } | |
236 | ||
237 | static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr, | |
238 | struct record_opts *opts, | |
239 | const char *str) | |
240 | { | |
241 | struct intel_pt_recording *ptr = | |
242 | container_of(itr, struct intel_pt_recording, itr); | |
243 | unsigned long long snapshot_size = 0; | |
244 | char *endptr; | |
245 | ||
246 | if (str) { | |
247 | snapshot_size = strtoull(str, &endptr, 0); | |
248 | if (*endptr || snapshot_size > SIZE_MAX) | |
249 | return -1; | |
250 | } | |
251 | ||
252 | opts->auxtrace_snapshot_mode = true; | |
253 | opts->auxtrace_snapshot_size = snapshot_size; | |
254 | ||
255 | ptr->snapshot_size = snapshot_size; | |
256 | ||
257 | return 0; | |
258 | } | |
259 | ||
260 | struct perf_event_attr * | |
261 | intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) | |
262 | { | |
263 | struct perf_event_attr *attr; | |
264 | ||
265 | attr = zalloc(sizeof(struct perf_event_attr)); | |
266 | if (!attr) | |
267 | return NULL; | |
268 | ||
269 | attr->config = intel_pt_default_config(intel_pt_pmu); | |
270 | ||
271 | intel_pt_pmu->selectable = true; | |
272 | ||
273 | return attr; | |
274 | } | |
275 | ||
14a05e13 MP |
276 | static size_t |
277 | intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused, | |
278 | struct perf_evlist *evlist __maybe_unused) | |
90e457f7 AH |
279 | { |
280 | return INTEL_PT_AUXTRACE_PRIV_SIZE; | |
281 | } | |
282 | ||
11fa7cb8 AH |
283 | static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d) |
284 | { | |
285 | unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; | |
286 | ||
287 | __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); | |
288 | *n = ebx; | |
289 | *d = eax; | |
290 | } | |
291 | ||
90e457f7 AH |
292 | static int intel_pt_info_fill(struct auxtrace_record *itr, |
293 | struct perf_session *session, | |
294 | struct auxtrace_info_event *auxtrace_info, | |
295 | size_t priv_size) | |
296 | { | |
297 | struct intel_pt_recording *ptr = | |
298 | container_of(itr, struct intel_pt_recording, itr); | |
299 | struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; | |
300 | struct perf_event_mmap_page *pc; | |
301 | struct perf_tsc_conversion tc = { .time_mult = 0, }; | |
302 | bool cap_user_time_zero = false, per_cpu_mmaps; | |
11fa7cb8 AH |
303 | u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit; |
304 | u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d; | |
90e457f7 AH |
305 | int err; |
306 | ||
307 | if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE) | |
308 | return -EINVAL; | |
309 | ||
310 | intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); | |
311 | intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp", | |
312 | &noretcomp_bit); | |
11fa7cb8 AH |
313 | intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit); |
314 | mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format, | |
315 | "mtc_period"); | |
316 | intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit); | |
317 | ||
318 | intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); | |
90e457f7 AH |
319 | |
320 | if (!session->evlist->nr_mmaps) | |
321 | return -EINVAL; | |
322 | ||
323 | pc = session->evlist->mmap[0].base; | |
324 | if (pc) { | |
325 | err = perf_read_tsc_conversion(pc, &tc); | |
326 | if (err) { | |
327 | if (err != -EOPNOTSUPP) | |
328 | return err; | |
329 | } else { | |
330 | cap_user_time_zero = tc.time_mult != 0; | |
331 | } | |
332 | if (!cap_user_time_zero) | |
333 | ui__warning("Intel Processor Trace: TSC not available\n"); | |
334 | } | |
335 | ||
336 | per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus); | |
337 | ||
338 | auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; | |
339 | auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; | |
340 | auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift; | |
341 | auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult; | |
342 | auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero; | |
343 | auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero; | |
344 | auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit; | |
345 | auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit; | |
346 | auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch; | |
347 | auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode; | |
348 | auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps; | |
11fa7cb8 AH |
349 | auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit; |
350 | auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits; | |
351 | auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n; | |
352 | auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d; | |
353 | auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit; | |
90e457f7 AH |
354 | |
355 | return 0; | |
356 | } | |
357 | ||
358 | static int intel_pt_track_switches(struct perf_evlist *evlist) | |
359 | { | |
360 | const char *sched_switch = "sched:sched_switch"; | |
361 | struct perf_evsel *evsel; | |
362 | int err; | |
363 | ||
364 | if (!perf_evlist__can_select_event(evlist, sched_switch)) | |
365 | return -EPERM; | |
366 | ||
367 | err = parse_events(evlist, sched_switch, NULL); | |
368 | if (err) { | |
369 | pr_debug2("%s: failed to parse %s, error %d\n", | |
370 | __func__, sched_switch, err); | |
371 | return err; | |
372 | } | |
373 | ||
374 | evsel = perf_evlist__last(evlist); | |
375 | ||
376 | perf_evsel__set_sample_bit(evsel, CPU); | |
377 | perf_evsel__set_sample_bit(evsel, TIME); | |
378 | ||
379 | evsel->system_wide = true; | |
380 | evsel->no_aux_samples = true; | |
381 | evsel->immediate = true; | |
382 | ||
383 | return 0; | |
384 | } | |
385 | ||
bc9b6bf0 AH |
386 | static void intel_pt_valid_str(char *str, size_t len, u64 valid) |
387 | { | |
388 | unsigned int val, last = 0, state = 1; | |
389 | int p = 0; | |
390 | ||
391 | str[0] = '\0'; | |
392 | ||
393 | for (val = 0; val <= 64; val++, valid >>= 1) { | |
394 | if (valid & 1) { | |
395 | last = val; | |
396 | switch (state) { | |
397 | case 0: | |
398 | p += scnprintf(str + p, len - p, ","); | |
399 | /* Fall through */ | |
400 | case 1: | |
401 | p += scnprintf(str + p, len - p, "%u", val); | |
402 | state = 2; | |
403 | break; | |
404 | case 2: | |
405 | state = 3; | |
406 | break; | |
407 | case 3: | |
408 | state = 4; | |
409 | break; | |
410 | default: | |
411 | break; | |
412 | } | |
413 | } else { | |
414 | switch (state) { | |
415 | case 3: | |
416 | p += scnprintf(str + p, len - p, ",%u", last); | |
417 | state = 0; | |
418 | break; | |
419 | case 4: | |
420 | p += scnprintf(str + p, len - p, "-%u", last); | |
421 | state = 0; | |
422 | break; | |
423 | default: | |
424 | break; | |
425 | } | |
426 | if (state != 1) | |
427 | state = 0; | |
428 | } | |
429 | } | |
430 | } | |
431 | ||
432 | static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, | |
433 | const char *caps, const char *name, | |
434 | const char *supported, u64 config) | |
435 | { | |
436 | char valid_str[256]; | |
437 | unsigned int shift; | |
438 | unsigned long long valid; | |
439 | u64 bits; | |
440 | int ok; | |
441 | ||
442 | if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1) | |
443 | valid = 0; | |
444 | ||
445 | if (supported && | |
446 | perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok) | |
447 | valid = 0; | |
448 | ||
449 | valid |= 1; | |
450 | ||
451 | bits = perf_pmu__format_bits(&intel_pt_pmu->format, name); | |
452 | ||
453 | config &= bits; | |
454 | ||
455 | for (shift = 0; bits && !(bits & 1); shift++) | |
456 | bits >>= 1; | |
457 | ||
458 | config >>= shift; | |
459 | ||
460 | if (config > 63) | |
461 | goto out_err; | |
462 | ||
463 | if (valid & (1 << config)) | |
464 | return 0; | |
465 | out_err: | |
466 | intel_pt_valid_str(valid_str, sizeof(valid_str), valid); | |
467 | pr_err("Invalid %s for %s. Valid values are: %s\n", | |
468 | name, INTEL_PT_PMU_NAME, valid_str); | |
469 | return -EINVAL; | |
470 | } | |
471 | ||
472 | static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, | |
473 | struct perf_evsel *evsel) | |
474 | { | |
b45fc0bf AH |
475 | int err; |
476 | ||
bc9b6bf0 AH |
477 | if (!evsel) |
478 | return 0; | |
479 | ||
0de802ab AH |
480 | err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", |
481 | "cyc_thresh", "caps/psb_cyc", | |
482 | evsel->attr.config); | |
483 | if (err) | |
484 | return err; | |
485 | ||
b45fc0bf AH |
486 | err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods", |
487 | "mtc_period", "caps/mtc", | |
488 | evsel->attr.config); | |
489 | if (err) | |
490 | return err; | |
491 | ||
bc9b6bf0 AH |
492 | return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods", |
493 | "psb_period", "caps/psb_cyc", | |
494 | evsel->attr.config); | |
495 | } | |
496 | ||
90e457f7 AH |
497 | static int intel_pt_recording_options(struct auxtrace_record *itr, |
498 | struct perf_evlist *evlist, | |
499 | struct record_opts *opts) | |
500 | { | |
501 | struct intel_pt_recording *ptr = | |
502 | container_of(itr, struct intel_pt_recording, itr); | |
503 | struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; | |
3d918fb1 | 504 | bool have_timing_info, need_immediate = false; |
90e457f7 AH |
505 | struct perf_evsel *evsel, *intel_pt_evsel = NULL; |
506 | const struct cpu_map *cpus = evlist->cpus; | |
507 | bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; | |
508 | u64 tsc_bit; | |
bc9b6bf0 | 509 | int err; |
90e457f7 AH |
510 | |
511 | ptr->evlist = evlist; | |
512 | ptr->snapshot_mode = opts->auxtrace_snapshot_mode; | |
513 | ||
e5cadb93 | 514 | evlist__for_each_entry(evlist, evsel) { |
90e457f7 AH |
515 | if (evsel->attr.type == intel_pt_pmu->type) { |
516 | if (intel_pt_evsel) { | |
517 | pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n"); | |
518 | return -EINVAL; | |
519 | } | |
520 | evsel->attr.freq = 0; | |
521 | evsel->attr.sample_period = 1; | |
522 | intel_pt_evsel = evsel; | |
523 | opts->full_auxtrace = true; | |
524 | } | |
525 | } | |
526 | ||
527 | if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { | |
528 | pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n"); | |
529 | return -EINVAL; | |
530 | } | |
531 | ||
532 | if (opts->use_clockid) { | |
533 | pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n"); | |
534 | return -EINVAL; | |
535 | } | |
536 | ||
537 | if (!opts->full_auxtrace) | |
538 | return 0; | |
539 | ||
bc9b6bf0 AH |
540 | err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); |
541 | if (err) | |
542 | return err; | |
543 | ||
90e457f7 AH |
544 | /* Set default sizes for snapshot mode */ |
545 | if (opts->auxtrace_snapshot_mode) { | |
546 | size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); | |
547 | ||
548 | if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { | |
549 | if (privileged) { | |
550 | opts->auxtrace_mmap_pages = MiB(4) / page_size; | |
551 | } else { | |
552 | opts->auxtrace_mmap_pages = KiB(128) / page_size; | |
553 | if (opts->mmap_pages == UINT_MAX) | |
554 | opts->mmap_pages = KiB(256) / page_size; | |
555 | } | |
556 | } else if (!opts->auxtrace_mmap_pages && !privileged && | |
557 | opts->mmap_pages == UINT_MAX) { | |
558 | opts->mmap_pages = KiB(256) / page_size; | |
559 | } | |
560 | if (!opts->auxtrace_snapshot_size) | |
561 | opts->auxtrace_snapshot_size = | |
562 | opts->auxtrace_mmap_pages * (size_t)page_size; | |
563 | if (!opts->auxtrace_mmap_pages) { | |
564 | size_t sz = opts->auxtrace_snapshot_size; | |
565 | ||
566 | sz = round_up(sz, page_size) / page_size; | |
567 | opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); | |
568 | } | |
569 | if (opts->auxtrace_snapshot_size > | |
570 | opts->auxtrace_mmap_pages * (size_t)page_size) { | |
571 | pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", | |
572 | opts->auxtrace_snapshot_size, | |
573 | opts->auxtrace_mmap_pages * (size_t)page_size); | |
574 | return -EINVAL; | |
575 | } | |
576 | if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { | |
577 | pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); | |
578 | return -EINVAL; | |
579 | } | |
580 | pr_debug2("Intel PT snapshot size: %zu\n", | |
581 | opts->auxtrace_snapshot_size); | |
582 | if (psb_period && | |
583 | opts->auxtrace_snapshot_size <= psb_period + | |
584 | INTEL_PT_PSB_PERIOD_NEAR) | |
585 | ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n", | |
586 | opts->auxtrace_snapshot_size, psb_period); | |
587 | } | |
588 | ||
589 | /* Set default sizes for full trace mode */ | |
590 | if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { | |
591 | if (privileged) { | |
592 | opts->auxtrace_mmap_pages = MiB(4) / page_size; | |
593 | } else { | |
594 | opts->auxtrace_mmap_pages = KiB(128) / page_size; | |
595 | if (opts->mmap_pages == UINT_MAX) | |
596 | opts->mmap_pages = KiB(256) / page_size; | |
597 | } | |
598 | } | |
599 | ||
600 | /* Validate auxtrace_mmap_pages */ | |
601 | if (opts->auxtrace_mmap_pages) { | |
602 | size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; | |
603 | size_t min_sz; | |
604 | ||
605 | if (opts->auxtrace_snapshot_mode) | |
606 | min_sz = KiB(4); | |
607 | else | |
608 | min_sz = KiB(8); | |
609 | ||
610 | if (sz < min_sz || !is_power_of_2(sz)) { | |
611 | pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n", | |
612 | min_sz / 1024); | |
613 | return -EINVAL; | |
614 | } | |
615 | } | |
616 | ||
617 | intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); | |
618 | ||
619 | if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit)) | |
620 | have_timing_info = true; | |
621 | else | |
622 | have_timing_info = false; | |
623 | ||
624 | /* | |
625 | * Per-cpu recording needs sched_switch events to distinguish different | |
626 | * threads. | |
627 | */ | |
628 | if (have_timing_info && !cpu_map__empty(cpus)) { | |
86c27869 AH |
629 | if (perf_can_record_switch_events()) { |
630 | bool cpu_wide = !target__none(&opts->target) && | |
631 | !target__has_task(&opts->target); | |
632 | ||
633 | if (!cpu_wide && perf_can_record_cpu_wide()) { | |
634 | struct perf_evsel *switch_evsel; | |
635 | ||
636 | err = parse_events(evlist, "dummy:u", NULL); | |
637 | if (err) | |
638 | return err; | |
639 | ||
640 | switch_evsel = perf_evlist__last(evlist); | |
641 | ||
642 | switch_evsel->attr.freq = 0; | |
643 | switch_evsel->attr.sample_period = 1; | |
644 | switch_evsel->attr.context_switch = 1; | |
645 | ||
646 | switch_evsel->system_wide = true; | |
647 | switch_evsel->no_aux_samples = true; | |
648 | switch_evsel->immediate = true; | |
649 | ||
650 | perf_evsel__set_sample_bit(switch_evsel, TID); | |
651 | perf_evsel__set_sample_bit(switch_evsel, TIME); | |
652 | perf_evsel__set_sample_bit(switch_evsel, CPU); | |
653 | ||
654 | opts->record_switch_events = false; | |
655 | ptr->have_sched_switch = 3; | |
656 | } else { | |
657 | opts->record_switch_events = true; | |
3d918fb1 | 658 | need_immediate = true; |
86c27869 AH |
659 | if (cpu_wide) |
660 | ptr->have_sched_switch = 3; | |
661 | else | |
662 | ptr->have_sched_switch = 2; | |
663 | } | |
664 | } else { | |
665 | err = intel_pt_track_switches(evlist); | |
666 | if (err == -EPERM) | |
667 | pr_debug2("Unable to select sched:sched_switch\n"); | |
668 | else if (err) | |
669 | return err; | |
670 | else | |
671 | ptr->have_sched_switch = 1; | |
672 | } | |
90e457f7 AH |
673 | } |
674 | ||
675 | if (intel_pt_evsel) { | |
676 | /* | |
677 | * To obtain the auxtrace buffer file descriptor, the auxtrace | |
678 | * event must come first. | |
679 | */ | |
680 | perf_evlist__to_front(evlist, intel_pt_evsel); | |
681 | /* | |
682 | * In the case of per-cpu mmaps, we need the CPU on the | |
683 | * AUX event. | |
684 | */ | |
685 | if (!cpu_map__empty(cpus)) | |
686 | perf_evsel__set_sample_bit(intel_pt_evsel, CPU); | |
687 | } | |
688 | ||
689 | /* Add dummy event to keep tracking */ | |
690 | if (opts->full_auxtrace) { | |
691 | struct perf_evsel *tracking_evsel; | |
90e457f7 AH |
692 | |
693 | err = parse_events(evlist, "dummy:u", NULL); | |
694 | if (err) | |
695 | return err; | |
696 | ||
697 | tracking_evsel = perf_evlist__last(evlist); | |
698 | ||
699 | perf_evlist__set_tracking_event(evlist, tracking_evsel); | |
700 | ||
701 | tracking_evsel->attr.freq = 0; | |
702 | tracking_evsel->attr.sample_period = 1; | |
703 | ||
3d918fb1 AH |
704 | if (need_immediate) |
705 | tracking_evsel->immediate = true; | |
706 | ||
90e457f7 | 707 | /* In per-cpu case, always need the time of mmap events etc */ |
86c27869 | 708 | if (!cpu_map__empty(cpus)) { |
90e457f7 | 709 | perf_evsel__set_sample_bit(tracking_evsel, TIME); |
86c27869 AH |
710 | /* And the CPU for switch events */ |
711 | perf_evsel__set_sample_bit(tracking_evsel, CPU); | |
712 | } | |
90e457f7 AH |
713 | } |
714 | ||
715 | /* | |
716 | * Warn the user when we do not have enough information to decode i.e. | |
717 | * per-cpu with no sched_switch (except workload-only). | |
718 | */ | |
719 | if (!ptr->have_sched_switch && !cpu_map__empty(cpus) && | |
720 | !target__none(&opts->target)) | |
721 | ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); | |
722 | ||
723 | return 0; | |
724 | } | |
725 | ||
726 | static int intel_pt_snapshot_start(struct auxtrace_record *itr) | |
727 | { | |
728 | struct intel_pt_recording *ptr = | |
729 | container_of(itr, struct intel_pt_recording, itr); | |
730 | struct perf_evsel *evsel; | |
731 | ||
e5cadb93 | 732 | evlist__for_each_entry(ptr->evlist, evsel) { |
90e457f7 | 733 | if (evsel->attr.type == ptr->intel_pt_pmu->type) |
d2190a80 | 734 | return perf_evsel__disable(evsel); |
90e457f7 AH |
735 | } |
736 | return -EINVAL; | |
737 | } | |
738 | ||
739 | static int intel_pt_snapshot_finish(struct auxtrace_record *itr) | |
740 | { | |
741 | struct intel_pt_recording *ptr = | |
742 | container_of(itr, struct intel_pt_recording, itr); | |
743 | struct perf_evsel *evsel; | |
744 | ||
e5cadb93 | 745 | evlist__for_each_entry(ptr->evlist, evsel) { |
90e457f7 | 746 | if (evsel->attr.type == ptr->intel_pt_pmu->type) |
d2190a80 | 747 | return perf_evsel__enable(evsel); |
90e457f7 AH |
748 | } |
749 | return -EINVAL; | |
750 | } | |
751 | ||
752 | static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx) | |
753 | { | |
754 | const size_t sz = sizeof(struct intel_pt_snapshot_ref); | |
755 | int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2; | |
756 | struct intel_pt_snapshot_ref *refs; | |
757 | ||
758 | if (!new_cnt) | |
759 | new_cnt = 16; | |
760 | ||
761 | while (new_cnt <= idx) | |
762 | new_cnt *= 2; | |
763 | ||
764 | refs = calloc(new_cnt, sz); | |
765 | if (!refs) | |
766 | return -ENOMEM; | |
767 | ||
768 | memcpy(refs, ptr->snapshot_refs, cnt * sz); | |
769 | ||
770 | ptr->snapshot_refs = refs; | |
771 | ptr->snapshot_ref_cnt = new_cnt; | |
772 | ||
773 | return 0; | |
774 | } | |
775 | ||
776 | static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr) | |
777 | { | |
778 | int i; | |
779 | ||
780 | for (i = 0; i < ptr->snapshot_ref_cnt; i++) | |
781 | zfree(&ptr->snapshot_refs[i].ref_buf); | |
782 | zfree(&ptr->snapshot_refs); | |
783 | } | |
784 | ||
785 | static void intel_pt_recording_free(struct auxtrace_record *itr) | |
786 | { | |
787 | struct intel_pt_recording *ptr = | |
788 | container_of(itr, struct intel_pt_recording, itr); | |
789 | ||
790 | intel_pt_free_snapshot_refs(ptr); | |
791 | free(ptr); | |
792 | } | |
793 | ||
794 | static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx, | |
795 | size_t snapshot_buf_size) | |
796 | { | |
797 | size_t ref_buf_size = ptr->snapshot_ref_buf_size; | |
798 | void *ref_buf; | |
799 | ||
800 | ref_buf = zalloc(ref_buf_size); | |
801 | if (!ref_buf) | |
802 | return -ENOMEM; | |
803 | ||
804 | ptr->snapshot_refs[idx].ref_buf = ref_buf; | |
805 | ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size; | |
806 | ||
807 | return 0; | |
808 | } | |
809 | ||
810 | static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr, | |
811 | size_t snapshot_buf_size) | |
812 | { | |
813 | const size_t max_size = 256 * 1024; | |
814 | size_t buf_size = 0, psb_period; | |
815 | ||
816 | if (ptr->snapshot_size <= 64 * 1024) | |
817 | return 0; | |
818 | ||
819 | psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist); | |
820 | if (psb_period) | |
821 | buf_size = psb_period * 2; | |
822 | ||
823 | if (!buf_size || buf_size > max_size) | |
824 | buf_size = max_size; | |
825 | ||
826 | if (buf_size >= snapshot_buf_size) | |
827 | return 0; | |
828 | ||
829 | if (buf_size >= ptr->snapshot_size / 2) | |
830 | return 0; | |
831 | ||
832 | return buf_size; | |
833 | } | |
834 | ||
835 | static int intel_pt_snapshot_init(struct intel_pt_recording *ptr, | |
836 | size_t snapshot_buf_size) | |
837 | { | |
838 | if (ptr->snapshot_init_done) | |
839 | return 0; | |
840 | ||
841 | ptr->snapshot_init_done = true; | |
842 | ||
843 | ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr, | |
844 | snapshot_buf_size); | |
845 | ||
846 | return 0; | |
847 | } | |
848 | ||
849 | /** | |
850 | * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer. | |
851 | * @buf1: first buffer | |
852 | * @compare_size: number of bytes to compare | |
853 | * @buf2: second buffer (a circular buffer) | |
854 | * @offs2: offset in second buffer | |
855 | * @buf2_size: size of second buffer | |
856 | * | |
857 | * The comparison allows for the possibility that the bytes to compare in the | |
858 | * circular buffer are not contiguous. It is assumed that @compare_size <= | |
859 | * @buf2_size. This function returns %false if the bytes are identical, %true | |
860 | * otherwise. | |
861 | */ | |
862 | static bool intel_pt_compare_buffers(void *buf1, size_t compare_size, | |
863 | void *buf2, size_t offs2, size_t buf2_size) | |
864 | { | |
865 | size_t end2 = offs2 + compare_size, part_size; | |
866 | ||
867 | if (end2 <= buf2_size) | |
868 | return memcmp(buf1, buf2 + offs2, compare_size); | |
869 | ||
870 | part_size = end2 - buf2_size; | |
871 | if (memcmp(buf1, buf2 + offs2, part_size)) | |
872 | return true; | |
873 | ||
874 | compare_size -= part_size; | |
875 | ||
876 | return memcmp(buf1 + part_size, buf2, compare_size); | |
877 | } | |
878 | ||
879 | static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset, | |
880 | size_t ref_size, size_t buf_size, | |
881 | void *data, size_t head) | |
882 | { | |
883 | size_t ref_end = ref_offset + ref_size; | |
884 | ||
885 | if (ref_end > buf_size) { | |
886 | if (head > ref_offset || head < ref_end - buf_size) | |
887 | return true; | |
888 | } else if (head > ref_offset && head < ref_end) { | |
889 | return true; | |
890 | } | |
891 | ||
892 | return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset, | |
893 | buf_size); | |
894 | } | |
895 | ||
896 | static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size, | |
897 | void *data, size_t head) | |
898 | { | |
899 | if (head >= ref_size) { | |
900 | memcpy(ref_buf, data + head - ref_size, ref_size); | |
901 | } else { | |
902 | memcpy(ref_buf, data, head); | |
903 | ref_size -= head; | |
904 | memcpy(ref_buf + head, data + buf_size - ref_size, ref_size); | |
905 | } | |
906 | } | |
907 | ||
908 | static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx, | |
909 | struct auxtrace_mmap *mm, unsigned char *data, | |
910 | u64 head) | |
911 | { | |
912 | struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx]; | |
913 | bool wrapped; | |
914 | ||
915 | wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset, | |
916 | ptr->snapshot_ref_buf_size, mm->len, | |
917 | data, head); | |
918 | ||
919 | intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len, | |
920 | data, head); | |
921 | ||
922 | return wrapped; | |
923 | } | |
924 | ||
925 | static bool intel_pt_first_wrap(u64 *data, size_t buf_size) | |
926 | { | |
927 | int i, a, b; | |
928 | ||
929 | b = buf_size >> 3; | |
930 | a = b - 512; | |
931 | if (a < 0) | |
932 | a = 0; | |
933 | ||
934 | for (i = a; i < b; i++) { | |
935 | if (data[i]) | |
936 | return true; | |
937 | } | |
938 | ||
939 | return false; | |
940 | } | |
941 | ||
942 | static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx, | |
943 | struct auxtrace_mmap *mm, unsigned char *data, | |
944 | u64 *head, u64 *old) | |
945 | { | |
946 | struct intel_pt_recording *ptr = | |
947 | container_of(itr, struct intel_pt_recording, itr); | |
948 | bool wrapped; | |
949 | int err; | |
950 | ||
951 | pr_debug3("%s: mmap index %d old head %zu new head %zu\n", | |
952 | __func__, idx, (size_t)*old, (size_t)*head); | |
953 | ||
954 | err = intel_pt_snapshot_init(ptr, mm->len); | |
955 | if (err) | |
956 | goto out_err; | |
957 | ||
958 | if (idx >= ptr->snapshot_ref_cnt) { | |
959 | err = intel_pt_alloc_snapshot_refs(ptr, idx); | |
960 | if (err) | |
961 | goto out_err; | |
962 | } | |
963 | ||
964 | if (ptr->snapshot_ref_buf_size) { | |
965 | if (!ptr->snapshot_refs[idx].ref_buf) { | |
966 | err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len); | |
967 | if (err) | |
968 | goto out_err; | |
969 | } | |
970 | wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head); | |
971 | } else { | |
972 | wrapped = ptr->snapshot_refs[idx].wrapped; | |
973 | if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) { | |
974 | ptr->snapshot_refs[idx].wrapped = true; | |
975 | wrapped = true; | |
976 | } | |
977 | } | |
978 | ||
979 | /* | |
980 | * In full trace mode 'head' continually increases. However in snapshot | |
981 | * mode 'head' is an offset within the buffer. Here 'old' and 'head' | |
982 | * are adjusted to match the full trace case which expects that 'old' is | |
983 | * always less than 'head'. | |
984 | */ | |
985 | if (wrapped) { | |
986 | *old = *head; | |
987 | *head += mm->len; | |
988 | } else { | |
989 | if (mm->mask) | |
990 | *old &= mm->mask; | |
991 | else | |
992 | *old %= mm->len; | |
993 | if (*old > *head) | |
994 | *head += mm->len; | |
995 | } | |
996 | ||
997 | pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", | |
998 | __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); | |
999 | ||
1000 | return 0; | |
1001 | ||
1002 | out_err: | |
1003 | pr_err("%s: failed, error %d\n", __func__, err); | |
1004 | return err; | |
1005 | } | |
1006 | ||
1007 | static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) | |
1008 | { | |
1009 | return rdtsc(); | |
1010 | } | |
1011 | ||
1012 | static int intel_pt_read_finish(struct auxtrace_record *itr, int idx) | |
1013 | { | |
1014 | struct intel_pt_recording *ptr = | |
1015 | container_of(itr, struct intel_pt_recording, itr); | |
1016 | struct perf_evsel *evsel; | |
1017 | ||
e5cadb93 | 1018 | evlist__for_each_entry(ptr->evlist, evsel) { |
90e457f7 AH |
1019 | if (evsel->attr.type == ptr->intel_pt_pmu->type) |
1020 | return perf_evlist__enable_event_idx(ptr->evlist, evsel, | |
1021 | idx); | |
1022 | } | |
1023 | return -EINVAL; | |
1024 | } | |
1025 | ||
1026 | struct auxtrace_record *intel_pt_recording_init(int *err) | |
1027 | { | |
1028 | struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); | |
1029 | struct intel_pt_recording *ptr; | |
1030 | ||
1031 | if (!intel_pt_pmu) | |
1032 | return NULL; | |
1033 | ||
bd0c7a54 AH |
1034 | if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { |
1035 | *err = -errno; | |
1036 | return NULL; | |
1037 | } | |
1038 | ||
90e457f7 AH |
1039 | ptr = zalloc(sizeof(struct intel_pt_recording)); |
1040 | if (!ptr) { | |
1041 | *err = -ENOMEM; | |
1042 | return NULL; | |
1043 | } | |
1044 | ||
1045 | ptr->intel_pt_pmu = intel_pt_pmu; | |
1046 | ptr->itr.recording_options = intel_pt_recording_options; | |
1047 | ptr->itr.info_priv_size = intel_pt_info_priv_size; | |
1048 | ptr->itr.info_fill = intel_pt_info_fill; | |
1049 | ptr->itr.free = intel_pt_recording_free; | |
1050 | ptr->itr.snapshot_start = intel_pt_snapshot_start; | |
1051 | ptr->itr.snapshot_finish = intel_pt_snapshot_finish; | |
1052 | ptr->itr.find_snapshot = intel_pt_find_snapshot; | |
1053 | ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; | |
1054 | ptr->itr.reference = intel_pt_reference; | |
1055 | ptr->itr.read_finish = intel_pt_read_finish; | |
1056 | return &ptr->itr; | |
1057 | } |