Commit | Line | Data |
---|---|---|
2025cf9e | 1 | // SPDX-License-Identifier: GPL-2.0-only |
90e457f7 AH |
2 | /* |
3 | * intel_pt.c: Intel Processor Trace support | |
4 | * Copyright (c) 2013-2015, Intel Corporation. | |
90e457f7 AH |
5 | */ |
6 | ||
a43783ae | 7 | #include <errno.h> |
90e457f7 AH |
8 | #include <stdbool.h> |
9 | #include <linux/kernel.h> | |
10 | #include <linux/types.h> | |
11 | #include <linux/bitops.h> | |
12 | #include <linux/log2.h> | |
7f7c536f | 13 | #include <linux/zalloc.h> |
11fa7cb8 | 14 | #include <cpuid.h> |
90e457f7 | 15 | |
441b62ac IR |
16 | #include "../../../util/session.h" |
17 | #include "../../../util/event.h" | |
18 | #include "../../../util/evlist.h" | |
19 | #include "../../../util/evsel.h" | |
20 | #include "../../../util/evsel_config.h" | |
21 | #include "../../../util/cpumap.h" | |
22 | #include "../../../util/mmap.h" | |
4b6ab94e | 23 | #include <subcmd/parse-options.h> |
441b62ac IR |
24 | #include "../../../util/parse-events.h" |
25 | #include "../../../util/pmu.h" | |
26 | #include "../../../util/debug.h" | |
27 | #include "../../../util/auxtrace.h" | |
28 | #include "../../../util/record.h" | |
29 | #include "../../../util/target.h" | |
30 | #include "../../../util/tsc.h" | |
20f2be1d | 31 | #include <internal/lib.h> // page_size |
441b62ac | 32 | #include "../../../util/intel-pt.h" |
90e457f7 AH |
33 | |
34 | #define KiB(x) ((x) * 1024) | |
35 | #define MiB(x) ((x) * 1024 * 1024) | |
36 | #define KiB_MASK(x) (KiB(x) - 1) | |
37 | #define MiB_MASK(x) (MiB(x) - 1) | |
38 | ||
90e457f7 AH |
39 | #define INTEL_PT_PSB_PERIOD_NEAR 256 |
40 | ||
41 | struct intel_pt_snapshot_ref { | |
42 | void *ref_buf; | |
43 | size_t ref_offset; | |
44 | bool wrapped; | |
45 | }; | |
46 | ||
47 | struct intel_pt_recording { | |
48 | struct auxtrace_record itr; | |
49 | struct perf_pmu *intel_pt_pmu; | |
50 | int have_sched_switch; | |
63503dba | 51 | struct evlist *evlist; |
90e457f7 AH |
52 | bool snapshot_mode; |
53 | bool snapshot_init_done; | |
54 | size_t snapshot_size; | |
55 | size_t snapshot_ref_buf_size; | |
56 | int snapshot_ref_cnt; | |
57 | struct intel_pt_snapshot_ref *snapshot_refs; | |
c093f308 | 58 | size_t priv_size; |
90e457f7 AH |
59 | }; |
60 | ||
61 | static int intel_pt_parse_terms_with_default(struct list_head *formats, | |
62 | const char *str, | |
63 | u64 *config) | |
64 | { | |
65 | struct list_head *terms; | |
66 | struct perf_event_attr attr = { .size = 0, }; | |
67 | int err; | |
68 | ||
69 | terms = malloc(sizeof(struct list_head)); | |
70 | if (!terms) | |
71 | return -ENOMEM; | |
72 | ||
73 | INIT_LIST_HEAD(terms); | |
74 | ||
75 | err = parse_events_terms(terms, str); | |
76 | if (err) | |
77 | goto out_free; | |
78 | ||
79 | attr.config = *config; | |
80 | err = perf_pmu__config_terms(formats, &attr, terms, true, NULL); | |
81 | if (err) | |
82 | goto out_free; | |
83 | ||
84 | *config = attr.config; | |
85 | out_free: | |
2146afc6 | 86 | parse_events_terms__delete(terms); |
90e457f7 AH |
87 | return err; |
88 | } | |
89 | ||
90 | static int intel_pt_parse_terms(struct list_head *formats, const char *str, | |
91 | u64 *config) | |
92 | { | |
93 | *config = 0; | |
94 | return intel_pt_parse_terms_with_default(formats, str, config); | |
95 | } | |
96 | ||
bc9b6bf0 | 97 | static u64 intel_pt_masked_bits(u64 mask, u64 bits) |
90e457f7 | 98 | { |
bc9b6bf0 AH |
99 | const u64 top_bit = 1ULL << 63; |
100 | u64 res = 0; | |
101 | int i; | |
102 | ||
103 | for (i = 0; i < 64; i++) { | |
104 | if (mask & top_bit) { | |
105 | res <<= 1; | |
106 | if (bits & top_bit) | |
107 | res |= 1; | |
108 | } | |
109 | mask <<= 1; | |
110 | bits <<= 1; | |
111 | } | |
112 | ||
113 | return res; | |
114 | } | |
115 | ||
116 | static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str, | |
63503dba | 117 | struct evlist *evlist, u64 *res) |
bc9b6bf0 | 118 | { |
32dcd021 | 119 | struct evsel *evsel; |
bc9b6bf0 AH |
120 | u64 mask; |
121 | ||
122 | *res = 0; | |
123 | ||
124 | mask = perf_pmu__format_bits(&intel_pt_pmu->format, str); | |
125 | if (!mask) | |
126 | return -EINVAL; | |
127 | ||
e5cadb93 | 128 | evlist__for_each_entry(evlist, evsel) { |
1fc632ce JO |
129 | if (evsel->core.attr.type == intel_pt_pmu->type) { |
130 | *res = intel_pt_masked_bits(mask, evsel->core.attr.config); | |
bc9b6bf0 AH |
131 | return 0; |
132 | } | |
133 | } | |
134 | ||
135 | return -EINVAL; | |
136 | } | |
137 | ||
138 | static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu, | |
63503dba | 139 | struct evlist *evlist) |
bc9b6bf0 AH |
140 | { |
141 | u64 val; | |
142 | int err, topa_multiple_entries; | |
143 | size_t psb_period; | |
144 | ||
145 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries", | |
146 | "%d", &topa_multiple_entries) != 1) | |
147 | topa_multiple_entries = 0; | |
148 | ||
149 | /* | |
150 | * Use caps/topa_multiple_entries to indicate early hardware that had | |
151 | * extra frequent PSBs. | |
152 | */ | |
153 | if (!topa_multiple_entries) { | |
154 | psb_period = 256; | |
155 | goto out; | |
156 | } | |
157 | ||
158 | err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val); | |
159 | if (err) | |
160 | val = 0; | |
161 | ||
162 | psb_period = 1 << (val + 11); | |
163 | out: | |
164 | pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period); | |
165 | return psb_period; | |
166 | } | |
167 | ||
168 | static int intel_pt_pick_bit(int bits, int target) | |
169 | { | |
170 | int pos, pick = -1; | |
171 | ||
172 | for (pos = 0; bits; bits >>= 1, pos++) { | |
173 | if (bits & 1) { | |
174 | if (pos <= target || pick < 0) | |
175 | pick = pos; | |
176 | if (pos >= target) | |
177 | break; | |
178 | } | |
179 | } | |
180 | ||
181 | return pick; | |
90e457f7 AH |
182 | } |
183 | ||
184 | static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) | |
185 | { | |
bc9b6bf0 | 186 | char buf[256]; |
b45fc0bf | 187 | int mtc, mtc_periods = 0, mtc_period; |
bc9b6bf0 AH |
188 | int psb_cyc, psb_periods, psb_period; |
189 | int pos = 0; | |
90e457f7 | 190 | u64 config; |
9fd629f9 | 191 | char c; |
90e457f7 | 192 | |
bc9b6bf0 AH |
193 | pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); |
194 | ||
b45fc0bf AH |
195 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d", |
196 | &mtc) != 1) | |
197 | mtc = 1; | |
198 | ||
199 | if (mtc) { | |
200 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x", | |
201 | &mtc_periods) != 1) | |
202 | mtc_periods = 0; | |
203 | if (mtc_periods) { | |
204 | mtc_period = intel_pt_pick_bit(mtc_periods, 3); | |
205 | pos += scnprintf(buf + pos, sizeof(buf) - pos, | |
206 | ",mtc,mtc_period=%d", mtc_period); | |
207 | } | |
208 | } | |
209 | ||
bc9b6bf0 AH |
210 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d", |
211 | &psb_cyc) != 1) | |
212 | psb_cyc = 1; | |
213 | ||
b45fc0bf | 214 | if (psb_cyc && mtc_periods) { |
bc9b6bf0 AH |
215 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x", |
216 | &psb_periods) != 1) | |
217 | psb_periods = 0; | |
218 | if (psb_periods) { | |
219 | psb_period = intel_pt_pick_bit(psb_periods, 3); | |
220 | pos += scnprintf(buf + pos, sizeof(buf) - pos, | |
221 | ",psb_period=%d", psb_period); | |
222 | } | |
223 | } | |
224 | ||
9fd629f9 AH |
225 | if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && |
226 | perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1) | |
227 | pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch"); | |
228 | ||
bc9b6bf0 AH |
229 | pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); |
230 | ||
231 | intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); | |
232 | ||
90e457f7 AH |
233 | return config; |
234 | } | |
235 | ||
236 | static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr, | |
237 | struct record_opts *opts, | |
238 | const char *str) | |
239 | { | |
240 | struct intel_pt_recording *ptr = | |
241 | container_of(itr, struct intel_pt_recording, itr); | |
242 | unsigned long long snapshot_size = 0; | |
243 | char *endptr; | |
244 | ||
245 | if (str) { | |
246 | snapshot_size = strtoull(str, &endptr, 0); | |
247 | if (*endptr || snapshot_size > SIZE_MAX) | |
248 | return -1; | |
249 | } | |
250 | ||
251 | opts->auxtrace_snapshot_mode = true; | |
252 | opts->auxtrace_snapshot_size = snapshot_size; | |
253 | ||
254 | ptr->snapshot_size = snapshot_size; | |
255 | ||
256 | return 0; | |
257 | } | |
258 | ||
259 | struct perf_event_attr * | |
260 | intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) | |
261 | { | |
262 | struct perf_event_attr *attr; | |
263 | ||
264 | attr = zalloc(sizeof(struct perf_event_attr)); | |
265 | if (!attr) | |
266 | return NULL; | |
267 | ||
268 | attr->config = intel_pt_default_config(intel_pt_pmu); | |
269 | ||
270 | intel_pt_pmu->selectable = true; | |
271 | ||
272 | return attr; | |
273 | } | |
274 | ||
63503dba | 275 | static const char *intel_pt_find_filter(struct evlist *evlist, |
c093f308 AH |
276 | struct perf_pmu *intel_pt_pmu) |
277 | { | |
32dcd021 | 278 | struct evsel *evsel; |
c093f308 AH |
279 | |
280 | evlist__for_each_entry(evlist, evsel) { | |
1fc632ce | 281 | if (evsel->core.attr.type == intel_pt_pmu->type) |
c093f308 AH |
282 | return evsel->filter; |
283 | } | |
284 | ||
285 | return NULL; | |
286 | } | |
287 | ||
288 | static size_t intel_pt_filter_bytes(const char *filter) | |
289 | { | |
290 | size_t len = filter ? strlen(filter) : 0; | |
291 | ||
292 | return len ? roundup(len + 1, 8) : 0; | |
293 | } | |
294 | ||
14a05e13 | 295 | static size_t |
63503dba | 296 | intel_pt_info_priv_size(struct auxtrace_record *itr, struct evlist *evlist) |
90e457f7 | 297 | { |
c093f308 AH |
298 | struct intel_pt_recording *ptr = |
299 | container_of(itr, struct intel_pt_recording, itr); | |
300 | const char *filter = intel_pt_find_filter(evlist, ptr->intel_pt_pmu); | |
301 | ||
302 | ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) + | |
303 | intel_pt_filter_bytes(filter); | |
304 | ||
305 | return ptr->priv_size; | |
90e457f7 AH |
306 | } |
307 | ||
11fa7cb8 AH |
308 | static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d) |
309 | { | |
310 | unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; | |
311 | ||
312 | __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); | |
313 | *n = ebx; | |
314 | *d = eax; | |
315 | } | |
316 | ||
90e457f7 AH |
317 | static int intel_pt_info_fill(struct auxtrace_record *itr, |
318 | struct perf_session *session, | |
72932371 | 319 | struct perf_record_auxtrace_info *auxtrace_info, |
90e457f7 AH |
320 | size_t priv_size) |
321 | { | |
322 | struct intel_pt_recording *ptr = | |
323 | container_of(itr, struct intel_pt_recording, itr); | |
324 | struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; | |
325 | struct perf_event_mmap_page *pc; | |
326 | struct perf_tsc_conversion tc = { .time_mult = 0, }; | |
327 | bool cap_user_time_zero = false, per_cpu_mmaps; | |
11fa7cb8 AH |
328 | u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit; |
329 | u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d; | |
fa8025c3 | 330 | unsigned long max_non_turbo_ratio; |
c093f308 AH |
331 | size_t filter_str_len; |
332 | const char *filter; | |
9a8dad04 | 333 | __u64 *info; |
90e457f7 AH |
334 | int err; |
335 | ||
c093f308 | 336 | if (priv_size != ptr->priv_size) |
90e457f7 AH |
337 | return -EINVAL; |
338 | ||
339 | intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); | |
340 | intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp", | |
341 | &noretcomp_bit); | |
11fa7cb8 AH |
342 | intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit); |
343 | mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format, | |
344 | "mtc_period"); | |
345 | intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit); | |
346 | ||
347 | intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); | |
90e457f7 | 348 | |
fa8025c3 AH |
349 | if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio", |
350 | "%lu", &max_non_turbo_ratio) != 1) | |
351 | max_non_turbo_ratio = 0; | |
352 | ||
c093f308 AH |
353 | filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu); |
354 | filter_str_len = filter ? strlen(filter) : 0; | |
355 | ||
c976ee11 | 356 | if (!session->evlist->core.nr_mmaps) |
90e457f7 AH |
357 | return -EINVAL; |
358 | ||
547740f7 | 359 | pc = session->evlist->mmap[0].core.base; |
90e457f7 AH |
360 | if (pc) { |
361 | err = perf_read_tsc_conversion(pc, &tc); | |
362 | if (err) { | |
363 | if (err != -EOPNOTSUPP) | |
364 | return err; | |
365 | } else { | |
366 | cap_user_time_zero = tc.time_mult != 0; | |
367 | } | |
368 | if (!cap_user_time_zero) | |
369 | ui__warning("Intel Processor Trace: TSC not available\n"); | |
370 | } | |
371 | ||
315c0a1f | 372 | per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.cpus); |
90e457f7 AH |
373 | |
374 | auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; | |
375 | auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; | |
376 | auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift; | |
377 | auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult; | |
378 | auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero; | |
379 | auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero; | |
380 | auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit; | |
381 | auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit; | |
382 | auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch; | |
383 | auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode; | |
384 | auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps; | |
11fa7cb8 AH |
385 | auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit; |
386 | auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits; | |
387 | auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n; | |
388 | auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d; | |
389 | auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit; | |
fa8025c3 | 390 | auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio; |
c093f308 AH |
391 | auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] = filter_str_len; |
392 | ||
393 | info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1; | |
394 | ||
395 | if (filter_str_len) { | |
396 | size_t len = intel_pt_filter_bytes(filter); | |
397 | ||
398 | strncpy((char *)info, filter, len); | |
399 | info += len >> 3; | |
400 | } | |
90e457f7 AH |
401 | |
402 | return 0; | |
403 | } | |
404 | ||
63503dba | 405 | static int intel_pt_track_switches(struct evlist *evlist) |
90e457f7 AH |
406 | { |
407 | const char *sched_switch = "sched:sched_switch"; | |
32dcd021 | 408 | struct evsel *evsel; |
90e457f7 AH |
409 | int err; |
410 | ||
411 | if (!perf_evlist__can_select_event(evlist, sched_switch)) | |
412 | return -EPERM; | |
413 | ||
414 | err = parse_events(evlist, sched_switch, NULL); | |
415 | if (err) { | |
416 | pr_debug2("%s: failed to parse %s, error %d\n", | |
417 | __func__, sched_switch, err); | |
418 | return err; | |
419 | } | |
420 | ||
515dbe48 | 421 | evsel = evlist__last(evlist); |
90e457f7 AH |
422 | |
423 | perf_evsel__set_sample_bit(evsel, CPU); | |
424 | perf_evsel__set_sample_bit(evsel, TIME); | |
425 | ||
648b5af3 | 426 | evsel->core.system_wide = true; |
90e457f7 AH |
427 | evsel->no_aux_samples = true; |
428 | evsel->immediate = true; | |
429 | ||
430 | return 0; | |
431 | } | |
432 | ||
bc9b6bf0 AH |
433 | static void intel_pt_valid_str(char *str, size_t len, u64 valid) |
434 | { | |
435 | unsigned int val, last = 0, state = 1; | |
436 | int p = 0; | |
437 | ||
438 | str[0] = '\0'; | |
439 | ||
440 | for (val = 0; val <= 64; val++, valid >>= 1) { | |
441 | if (valid & 1) { | |
442 | last = val; | |
443 | switch (state) { | |
444 | case 0: | |
445 | p += scnprintf(str + p, len - p, ","); | |
446 | /* Fall through */ | |
447 | case 1: | |
448 | p += scnprintf(str + p, len - p, "%u", val); | |
449 | state = 2; | |
450 | break; | |
451 | case 2: | |
452 | state = 3; | |
453 | break; | |
454 | case 3: | |
455 | state = 4; | |
456 | break; | |
457 | default: | |
458 | break; | |
459 | } | |
460 | } else { | |
461 | switch (state) { | |
462 | case 3: | |
463 | p += scnprintf(str + p, len - p, ",%u", last); | |
464 | state = 0; | |
465 | break; | |
466 | case 4: | |
467 | p += scnprintf(str + p, len - p, "-%u", last); | |
468 | state = 0; | |
469 | break; | |
470 | default: | |
471 | break; | |
472 | } | |
473 | if (state != 1) | |
474 | state = 0; | |
475 | } | |
476 | } | |
477 | } | |
478 | ||
479 | static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, | |
480 | const char *caps, const char *name, | |
481 | const char *supported, u64 config) | |
482 | { | |
483 | char valid_str[256]; | |
484 | unsigned int shift; | |
485 | unsigned long long valid; | |
486 | u64 bits; | |
487 | int ok; | |
488 | ||
489 | if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1) | |
490 | valid = 0; | |
491 | ||
492 | if (supported && | |
493 | perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok) | |
494 | valid = 0; | |
495 | ||
496 | valid |= 1; | |
497 | ||
498 | bits = perf_pmu__format_bits(&intel_pt_pmu->format, name); | |
499 | ||
500 | config &= bits; | |
501 | ||
502 | for (shift = 0; bits && !(bits & 1); shift++) | |
503 | bits >>= 1; | |
504 | ||
505 | config >>= shift; | |
506 | ||
507 | if (config > 63) | |
508 | goto out_err; | |
509 | ||
510 | if (valid & (1 << config)) | |
511 | return 0; | |
512 | out_err: | |
513 | intel_pt_valid_str(valid_str, sizeof(valid_str), valid); | |
514 | pr_err("Invalid %s for %s. Valid values are: %s\n", | |
515 | name, INTEL_PT_PMU_NAME, valid_str); | |
516 | return -EINVAL; | |
517 | } | |
518 | ||
519 | static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, | |
32dcd021 | 520 | struct evsel *evsel) |
bc9b6bf0 | 521 | { |
b45fc0bf | 522 | int err; |
1c6f709b | 523 | char c; |
b45fc0bf | 524 | |
bc9b6bf0 AH |
525 | if (!evsel) |
526 | return 0; | |
527 | ||
1c6f709b AH |
528 | /* |
529 | * If supported, force pass-through config term (pt=1) even if user | |
530 | * sets pt=0, which avoids senseless kernel errors. | |
531 | */ | |
532 | if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && | |
1fc632ce | 533 | !(evsel->core.attr.config & 1)) { |
1c6f709b | 534 | pr_warning("pt=0 doesn't make sense, forcing pt=1\n"); |
1fc632ce | 535 | evsel->core.attr.config |= 1; |
1c6f709b AH |
536 | } |
537 | ||
0de802ab AH |
538 | err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", |
539 | "cyc_thresh", "caps/psb_cyc", | |
1fc632ce | 540 | evsel->core.attr.config); |
0de802ab AH |
541 | if (err) |
542 | return err; | |
543 | ||
b45fc0bf AH |
544 | err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods", |
545 | "mtc_period", "caps/mtc", | |
1fc632ce | 546 | evsel->core.attr.config); |
b45fc0bf AH |
547 | if (err) |
548 | return err; | |
549 | ||
bc9b6bf0 AH |
550 | return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods", |
551 | "psb_period", "caps/psb_cyc", | |
1fc632ce | 552 | evsel->core.attr.config); |
bc9b6bf0 AH |
553 | } |
554 | ||
c4ab2f0f AH |
555 | static void intel_pt_config_sample_mode(struct perf_pmu *intel_pt_pmu, |
556 | struct evsel *evsel) | |
557 | { | |
558 | struct perf_evsel_config_term *term; | |
559 | u64 user_bits = 0, bits; | |
560 | ||
561 | term = perf_evsel__get_config_term(evsel, CFG_CHG); | |
562 | if (term) | |
563 | user_bits = term->val.cfg_chg; | |
564 | ||
565 | bits = perf_pmu__format_bits(&intel_pt_pmu->format, "psb_period"); | |
566 | ||
567 | /* Did user change psb_period */ | |
568 | if (bits & user_bits) | |
569 | return; | |
570 | ||
571 | /* Set psb_period to 0 */ | |
572 | evsel->core.attr.config &= ~bits; | |
573 | } | |
574 | ||
575 | static void intel_pt_min_max_sample_sz(struct evlist *evlist, | |
576 | size_t *min_sz, size_t *max_sz) | |
577 | { | |
578 | struct evsel *evsel; | |
579 | ||
580 | evlist__for_each_entry(evlist, evsel) { | |
581 | size_t sz = evsel->core.attr.aux_sample_size; | |
582 | ||
583 | if (!sz) | |
584 | continue; | |
585 | if (min_sz && (sz < *min_sz || !*min_sz)) | |
586 | *min_sz = sz; | |
587 | if (max_sz && sz > *max_sz) | |
588 | *max_sz = sz; | |
589 | } | |
590 | } | |
591 | ||
9e64cefe AH |
592 | /* |
593 | * Currently, there is not enough information to disambiguate different PEBS | |
594 | * events, so only allow one. | |
595 | */ | |
596 | static bool intel_pt_too_many_aux_output(struct evlist *evlist) | |
597 | { | |
598 | struct evsel *evsel; | |
599 | int aux_output_cnt = 0; | |
600 | ||
601 | evlist__for_each_entry(evlist, evsel) | |
602 | aux_output_cnt += !!evsel->core.attr.aux_output; | |
603 | ||
604 | if (aux_output_cnt > 1) { | |
605 | pr_err(INTEL_PT_PMU_NAME " supports at most one event with aux-output\n"); | |
606 | return true; | |
607 | } | |
608 | ||
609 | return false; | |
610 | } | |
611 | ||
90e457f7 | 612 | static int intel_pt_recording_options(struct auxtrace_record *itr, |
63503dba | 613 | struct evlist *evlist, |
90e457f7 AH |
614 | struct record_opts *opts) |
615 | { | |
616 | struct intel_pt_recording *ptr = | |
617 | container_of(itr, struct intel_pt_recording, itr); | |
618 | struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; | |
3d918fb1 | 619 | bool have_timing_info, need_immediate = false; |
32dcd021 | 620 | struct evsel *evsel, *intel_pt_evsel = NULL; |
f72f901d | 621 | const struct perf_cpu_map *cpus = evlist->core.cpus; |
dda1bf8e | 622 | bool privileged = perf_event_paranoid_check(-1); |
90e457f7 | 623 | u64 tsc_bit; |
bc9b6bf0 | 624 | int err; |
90e457f7 AH |
625 | |
626 | ptr->evlist = evlist; | |
627 | ptr->snapshot_mode = opts->auxtrace_snapshot_mode; | |
628 | ||
e5cadb93 | 629 | evlist__for_each_entry(evlist, evsel) { |
1fc632ce | 630 | if (evsel->core.attr.type == intel_pt_pmu->type) { |
90e457f7 AH |
631 | if (intel_pt_evsel) { |
632 | pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n"); | |
633 | return -EINVAL; | |
634 | } | |
1fc632ce JO |
635 | evsel->core.attr.freq = 0; |
636 | evsel->core.attr.sample_period = 1; | |
90e457f7 AH |
637 | intel_pt_evsel = evsel; |
638 | opts->full_auxtrace = true; | |
639 | } | |
640 | } | |
641 | ||
642 | if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { | |
643 | pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n"); | |
644 | return -EINVAL; | |
645 | } | |
646 | ||
c4ab2f0f AH |
647 | if (opts->auxtrace_snapshot_mode && opts->auxtrace_sample_mode) { |
648 | pr_err("Snapshot mode (" INTEL_PT_PMU_NAME " PMU) and sample trace cannot be used together\n"); | |
649 | return -EINVAL; | |
650 | } | |
651 | ||
90e457f7 AH |
652 | if (opts->use_clockid) { |
653 | pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n"); | |
654 | return -EINVAL; | |
655 | } | |
656 | ||
9e64cefe AH |
657 | if (intel_pt_too_many_aux_output(evlist)) |
658 | return -EINVAL; | |
659 | ||
90e457f7 AH |
660 | if (!opts->full_auxtrace) |
661 | return 0; | |
662 | ||
c4ab2f0f AH |
663 | if (opts->auxtrace_sample_mode) |
664 | intel_pt_config_sample_mode(intel_pt_pmu, intel_pt_evsel); | |
665 | ||
bc9b6bf0 AH |
666 | err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); |
667 | if (err) | |
668 | return err; | |
669 | ||
90e457f7 AH |
670 | /* Set default sizes for snapshot mode */ |
671 | if (opts->auxtrace_snapshot_mode) { | |
672 | size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); | |
673 | ||
674 | if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { | |
675 | if (privileged) { | |
676 | opts->auxtrace_mmap_pages = MiB(4) / page_size; | |
677 | } else { | |
678 | opts->auxtrace_mmap_pages = KiB(128) / page_size; | |
679 | if (opts->mmap_pages == UINT_MAX) | |
680 | opts->mmap_pages = KiB(256) / page_size; | |
681 | } | |
682 | } else if (!opts->auxtrace_mmap_pages && !privileged && | |
683 | opts->mmap_pages == UINT_MAX) { | |
684 | opts->mmap_pages = KiB(256) / page_size; | |
685 | } | |
686 | if (!opts->auxtrace_snapshot_size) | |
687 | opts->auxtrace_snapshot_size = | |
688 | opts->auxtrace_mmap_pages * (size_t)page_size; | |
689 | if (!opts->auxtrace_mmap_pages) { | |
690 | size_t sz = opts->auxtrace_snapshot_size; | |
691 | ||
692 | sz = round_up(sz, page_size) / page_size; | |
693 | opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); | |
694 | } | |
695 | if (opts->auxtrace_snapshot_size > | |
696 | opts->auxtrace_mmap_pages * (size_t)page_size) { | |
697 | pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", | |
698 | opts->auxtrace_snapshot_size, | |
699 | opts->auxtrace_mmap_pages * (size_t)page_size); | |
700 | return -EINVAL; | |
701 | } | |
702 | if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { | |
703 | pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); | |
704 | return -EINVAL; | |
705 | } | |
706 | pr_debug2("Intel PT snapshot size: %zu\n", | |
707 | opts->auxtrace_snapshot_size); | |
708 | if (psb_period && | |
709 | opts->auxtrace_snapshot_size <= psb_period + | |
710 | INTEL_PT_PSB_PERIOD_NEAR) | |
711 | ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n", | |
712 | opts->auxtrace_snapshot_size, psb_period); | |
713 | } | |
714 | ||
c4ab2f0f AH |
715 | /* Set default sizes for sample mode */ |
716 | if (opts->auxtrace_sample_mode) { | |
717 | size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); | |
718 | size_t min_sz = 0, max_sz = 0; | |
719 | ||
720 | intel_pt_min_max_sample_sz(evlist, &min_sz, &max_sz); | |
721 | if (!opts->auxtrace_mmap_pages && !privileged && | |
722 | opts->mmap_pages == UINT_MAX) | |
723 | opts->mmap_pages = KiB(256) / page_size; | |
724 | if (!opts->auxtrace_mmap_pages) { | |
725 | size_t sz = round_up(max_sz, page_size) / page_size; | |
726 | ||
727 | opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); | |
728 | } | |
729 | if (max_sz > opts->auxtrace_mmap_pages * (size_t)page_size) { | |
730 | pr_err("Sample size %zu must not be greater than AUX area tracing mmap size %zu\n", | |
731 | max_sz, | |
732 | opts->auxtrace_mmap_pages * (size_t)page_size); | |
733 | return -EINVAL; | |
734 | } | |
735 | pr_debug2("Intel PT min. sample size: %zu max. sample size: %zu\n", | |
736 | min_sz, max_sz); | |
737 | if (psb_period && | |
738 | min_sz <= psb_period + INTEL_PT_PSB_PERIOD_NEAR) | |
739 | ui__warning("Intel PT sample size (%zu) may be too small for PSB period (%zu)\n", | |
740 | min_sz, psb_period); | |
741 | } | |
742 | ||
90e457f7 AH |
743 | /* Set default sizes for full trace mode */ |
744 | if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { | |
745 | if (privileged) { | |
746 | opts->auxtrace_mmap_pages = MiB(4) / page_size; | |
747 | } else { | |
748 | opts->auxtrace_mmap_pages = KiB(128) / page_size; | |
749 | if (opts->mmap_pages == UINT_MAX) | |
750 | opts->mmap_pages = KiB(256) / page_size; | |
751 | } | |
752 | } | |
753 | ||
754 | /* Validate auxtrace_mmap_pages */ | |
755 | if (opts->auxtrace_mmap_pages) { | |
756 | size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; | |
757 | size_t min_sz; | |
758 | ||
c4ab2f0f | 759 | if (opts->auxtrace_snapshot_mode || opts->auxtrace_sample_mode) |
90e457f7 AH |
760 | min_sz = KiB(4); |
761 | else | |
762 | min_sz = KiB(8); | |
763 | ||
764 | if (sz < min_sz || !is_power_of_2(sz)) { | |
765 | pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n", | |
766 | min_sz / 1024); | |
767 | return -EINVAL; | |
768 | } | |
769 | } | |
770 | ||
771 | intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); | |
772 | ||
1fc632ce | 773 | if (opts->full_auxtrace && (intel_pt_evsel->core.attr.config & tsc_bit)) |
90e457f7 AH |
774 | have_timing_info = true; |
775 | else | |
776 | have_timing_info = false; | |
777 | ||
778 | /* | |
779 | * Per-cpu recording needs sched_switch events to distinguish different | |
780 | * threads. | |
781 | */ | |
315c0a1f | 782 | if (have_timing_info && !perf_cpu_map__empty(cpus)) { |
86c27869 AH |
783 | if (perf_can_record_switch_events()) { |
784 | bool cpu_wide = !target__none(&opts->target) && | |
785 | !target__has_task(&opts->target); | |
786 | ||
787 | if (!cpu_wide && perf_can_record_cpu_wide()) { | |
32dcd021 | 788 | struct evsel *switch_evsel; |
86c27869 AH |
789 | |
790 | err = parse_events(evlist, "dummy:u", NULL); | |
791 | if (err) | |
792 | return err; | |
793 | ||
515dbe48 | 794 | switch_evsel = evlist__last(evlist); |
86c27869 | 795 | |
1fc632ce JO |
796 | switch_evsel->core.attr.freq = 0; |
797 | switch_evsel->core.attr.sample_period = 1; | |
798 | switch_evsel->core.attr.context_switch = 1; | |
86c27869 | 799 | |
648b5af3 | 800 | switch_evsel->core.system_wide = true; |
86c27869 AH |
801 | switch_evsel->no_aux_samples = true; |
802 | switch_evsel->immediate = true; | |
803 | ||
804 | perf_evsel__set_sample_bit(switch_evsel, TID); | |
805 | perf_evsel__set_sample_bit(switch_evsel, TIME); | |
806 | perf_evsel__set_sample_bit(switch_evsel, CPU); | |
91a8c5b8 | 807 | perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK); |
86c27869 AH |
808 | |
809 | opts->record_switch_events = false; | |
810 | ptr->have_sched_switch = 3; | |
811 | } else { | |
812 | opts->record_switch_events = true; | |
3d918fb1 | 813 | need_immediate = true; |
86c27869 AH |
814 | if (cpu_wide) |
815 | ptr->have_sched_switch = 3; | |
816 | else | |
817 | ptr->have_sched_switch = 2; | |
818 | } | |
819 | } else { | |
820 | err = intel_pt_track_switches(evlist); | |
821 | if (err == -EPERM) | |
822 | pr_debug2("Unable to select sched:sched_switch\n"); | |
823 | else if (err) | |
824 | return err; | |
825 | else | |
826 | ptr->have_sched_switch = 1; | |
827 | } | |
90e457f7 AH |
828 | } |
829 | ||
830 | if (intel_pt_evsel) { | |
831 | /* | |
832 | * To obtain the auxtrace buffer file descriptor, the auxtrace | |
833 | * event must come first. | |
834 | */ | |
835 | perf_evlist__to_front(evlist, intel_pt_evsel); | |
836 | /* | |
837 | * In the case of per-cpu mmaps, we need the CPU on the | |
838 | * AUX event. | |
839 | */ | |
315c0a1f | 840 | if (!perf_cpu_map__empty(cpus)) |
90e457f7 AH |
841 | perf_evsel__set_sample_bit(intel_pt_evsel, CPU); |
842 | } | |
843 | ||
844 | /* Add dummy event to keep tracking */ | |
845 | if (opts->full_auxtrace) { | |
32dcd021 | 846 | struct evsel *tracking_evsel; |
90e457f7 AH |
847 | |
848 | err = parse_events(evlist, "dummy:u", NULL); | |
849 | if (err) | |
850 | return err; | |
851 | ||
515dbe48 | 852 | tracking_evsel = evlist__last(evlist); |
90e457f7 AH |
853 | |
854 | perf_evlist__set_tracking_event(evlist, tracking_evsel); | |
855 | ||
1fc632ce JO |
856 | tracking_evsel->core.attr.freq = 0; |
857 | tracking_evsel->core.attr.sample_period = 1; | |
90e457f7 | 858 | |
69d8bd8a | 859 | tracking_evsel->no_aux_samples = true; |
3d918fb1 AH |
860 | if (need_immediate) |
861 | tracking_evsel->immediate = true; | |
862 | ||
90e457f7 | 863 | /* In per-cpu case, always need the time of mmap events etc */ |
315c0a1f | 864 | if (!perf_cpu_map__empty(cpus)) { |
90e457f7 | 865 | perf_evsel__set_sample_bit(tracking_evsel, TIME); |
86c27869 AH |
866 | /* And the CPU for switch events */ |
867 | perf_evsel__set_sample_bit(tracking_evsel, CPU); | |
868 | } | |
91a8c5b8 | 869 | perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); |
90e457f7 AH |
870 | } |
871 | ||
872 | /* | |
873 | * Warn the user when we do not have enough information to decode i.e. | |
874 | * per-cpu with no sched_switch (except workload-only). | |
875 | */ | |
315c0a1f | 876 | if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) && |
90e457f7 AH |
877 | !target__none(&opts->target)) |
878 | ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); | |
879 | ||
880 | return 0; | |
881 | } | |
882 | ||
883 | static int intel_pt_snapshot_start(struct auxtrace_record *itr) | |
884 | { | |
885 | struct intel_pt_recording *ptr = | |
886 | container_of(itr, struct intel_pt_recording, itr); | |
32dcd021 | 887 | struct evsel *evsel; |
90e457f7 | 888 | |
e5cadb93 | 889 | evlist__for_each_entry(ptr->evlist, evsel) { |
1fc632ce | 890 | if (evsel->core.attr.type == ptr->intel_pt_pmu->type) |
9a10bb22 | 891 | return evsel__disable(evsel); |
90e457f7 AH |
892 | } |
893 | return -EINVAL; | |
894 | } | |
895 | ||
896 | static int intel_pt_snapshot_finish(struct auxtrace_record *itr) | |
897 | { | |
898 | struct intel_pt_recording *ptr = | |
899 | container_of(itr, struct intel_pt_recording, itr); | |
32dcd021 | 900 | struct evsel *evsel; |
90e457f7 | 901 | |
e5cadb93 | 902 | evlist__for_each_entry(ptr->evlist, evsel) { |
1fc632ce | 903 | if (evsel->core.attr.type == ptr->intel_pt_pmu->type) |
ec7f24ef | 904 | return evsel__enable(evsel); |
90e457f7 AH |
905 | } |
906 | return -EINVAL; | |
907 | } | |
908 | ||
909 | static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx) | |
910 | { | |
911 | const size_t sz = sizeof(struct intel_pt_snapshot_ref); | |
912 | int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2; | |
913 | struct intel_pt_snapshot_ref *refs; | |
914 | ||
915 | if (!new_cnt) | |
916 | new_cnt = 16; | |
917 | ||
918 | while (new_cnt <= idx) | |
919 | new_cnt *= 2; | |
920 | ||
921 | refs = calloc(new_cnt, sz); | |
922 | if (!refs) | |
923 | return -ENOMEM; | |
924 | ||
925 | memcpy(refs, ptr->snapshot_refs, cnt * sz); | |
926 | ||
927 | ptr->snapshot_refs = refs; | |
928 | ptr->snapshot_ref_cnt = new_cnt; | |
929 | ||
930 | return 0; | |
931 | } | |
932 | ||
933 | static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr) | |
934 | { | |
935 | int i; | |
936 | ||
937 | for (i = 0; i < ptr->snapshot_ref_cnt; i++) | |
938 | zfree(&ptr->snapshot_refs[i].ref_buf); | |
939 | zfree(&ptr->snapshot_refs); | |
940 | } | |
941 | ||
942 | static void intel_pt_recording_free(struct auxtrace_record *itr) | |
943 | { | |
944 | struct intel_pt_recording *ptr = | |
945 | container_of(itr, struct intel_pt_recording, itr); | |
946 | ||
947 | intel_pt_free_snapshot_refs(ptr); | |
948 | free(ptr); | |
949 | } | |
950 | ||
951 | static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx, | |
952 | size_t snapshot_buf_size) | |
953 | { | |
954 | size_t ref_buf_size = ptr->snapshot_ref_buf_size; | |
955 | void *ref_buf; | |
956 | ||
957 | ref_buf = zalloc(ref_buf_size); | |
958 | if (!ref_buf) | |
959 | return -ENOMEM; | |
960 | ||
961 | ptr->snapshot_refs[idx].ref_buf = ref_buf; | |
962 | ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size; | |
963 | ||
964 | return 0; | |
965 | } | |
966 | ||
967 | static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr, | |
968 | size_t snapshot_buf_size) | |
969 | { | |
970 | const size_t max_size = 256 * 1024; | |
971 | size_t buf_size = 0, psb_period; | |
972 | ||
973 | if (ptr->snapshot_size <= 64 * 1024) | |
974 | return 0; | |
975 | ||
976 | psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist); | |
977 | if (psb_period) | |
978 | buf_size = psb_period * 2; | |
979 | ||
980 | if (!buf_size || buf_size > max_size) | |
981 | buf_size = max_size; | |
982 | ||
983 | if (buf_size >= snapshot_buf_size) | |
984 | return 0; | |
985 | ||
986 | if (buf_size >= ptr->snapshot_size / 2) | |
987 | return 0; | |
988 | ||
989 | return buf_size; | |
990 | } | |
991 | ||
992 | static int intel_pt_snapshot_init(struct intel_pt_recording *ptr, | |
993 | size_t snapshot_buf_size) | |
994 | { | |
995 | if (ptr->snapshot_init_done) | |
996 | return 0; | |
997 | ||
998 | ptr->snapshot_init_done = true; | |
999 | ||
1000 | ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr, | |
1001 | snapshot_buf_size); | |
1002 | ||
1003 | return 0; | |
1004 | } | |
1005 | ||
1006 | /** | |
1007 | * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer. | |
1008 | * @buf1: first buffer | |
1009 | * @compare_size: number of bytes to compare | |
1010 | * @buf2: second buffer (a circular buffer) | |
1011 | * @offs2: offset in second buffer | |
1012 | * @buf2_size: size of second buffer | |
1013 | * | |
1014 | * The comparison allows for the possibility that the bytes to compare in the | |
1015 | * circular buffer are not contiguous. It is assumed that @compare_size <= | |
1016 | * @buf2_size. This function returns %false if the bytes are identical, %true | |
1017 | * otherwise. | |
1018 | */ | |
1019 | static bool intel_pt_compare_buffers(void *buf1, size_t compare_size, | |
1020 | void *buf2, size_t offs2, size_t buf2_size) | |
1021 | { | |
1022 | size_t end2 = offs2 + compare_size, part_size; | |
1023 | ||
1024 | if (end2 <= buf2_size) | |
1025 | return memcmp(buf1, buf2 + offs2, compare_size); | |
1026 | ||
1027 | part_size = end2 - buf2_size; | |
1028 | if (memcmp(buf1, buf2 + offs2, part_size)) | |
1029 | return true; | |
1030 | ||
1031 | compare_size -= part_size; | |
1032 | ||
1033 | return memcmp(buf1 + part_size, buf2, compare_size); | |
1034 | } | |
1035 | ||
1036 | static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset, | |
1037 | size_t ref_size, size_t buf_size, | |
1038 | void *data, size_t head) | |
1039 | { | |
1040 | size_t ref_end = ref_offset + ref_size; | |
1041 | ||
1042 | if (ref_end > buf_size) { | |
1043 | if (head > ref_offset || head < ref_end - buf_size) | |
1044 | return true; | |
1045 | } else if (head > ref_offset && head < ref_end) { | |
1046 | return true; | |
1047 | } | |
1048 | ||
1049 | return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset, | |
1050 | buf_size); | |
1051 | } | |
1052 | ||
1053 | static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size, | |
1054 | void *data, size_t head) | |
1055 | { | |
1056 | if (head >= ref_size) { | |
1057 | memcpy(ref_buf, data + head - ref_size, ref_size); | |
1058 | } else { | |
1059 | memcpy(ref_buf, data, head); | |
1060 | ref_size -= head; | |
1061 | memcpy(ref_buf + head, data + buf_size - ref_size, ref_size); | |
1062 | } | |
1063 | } | |
1064 | ||
1065 | static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx, | |
1066 | struct auxtrace_mmap *mm, unsigned char *data, | |
1067 | u64 head) | |
1068 | { | |
1069 | struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx]; | |
1070 | bool wrapped; | |
1071 | ||
1072 | wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset, | |
1073 | ptr->snapshot_ref_buf_size, mm->len, | |
1074 | data, head); | |
1075 | ||
1076 | intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len, | |
1077 | data, head); | |
1078 | ||
1079 | return wrapped; | |
1080 | } | |
1081 | ||
1082 | static bool intel_pt_first_wrap(u64 *data, size_t buf_size) | |
1083 | { | |
1084 | int i, a, b; | |
1085 | ||
1086 | b = buf_size >> 3; | |
1087 | a = b - 512; | |
1088 | if (a < 0) | |
1089 | a = 0; | |
1090 | ||
1091 | for (i = a; i < b; i++) { | |
1092 | if (data[i]) | |
1093 | return true; | |
1094 | } | |
1095 | ||
1096 | return false; | |
1097 | } | |
1098 | ||
1099 | static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx, | |
1100 | struct auxtrace_mmap *mm, unsigned char *data, | |
1101 | u64 *head, u64 *old) | |
1102 | { | |
1103 | struct intel_pt_recording *ptr = | |
1104 | container_of(itr, struct intel_pt_recording, itr); | |
1105 | bool wrapped; | |
1106 | int err; | |
1107 | ||
1108 | pr_debug3("%s: mmap index %d old head %zu new head %zu\n", | |
1109 | __func__, idx, (size_t)*old, (size_t)*head); | |
1110 | ||
1111 | err = intel_pt_snapshot_init(ptr, mm->len); | |
1112 | if (err) | |
1113 | goto out_err; | |
1114 | ||
1115 | if (idx >= ptr->snapshot_ref_cnt) { | |
1116 | err = intel_pt_alloc_snapshot_refs(ptr, idx); | |
1117 | if (err) | |
1118 | goto out_err; | |
1119 | } | |
1120 | ||
1121 | if (ptr->snapshot_ref_buf_size) { | |
1122 | if (!ptr->snapshot_refs[idx].ref_buf) { | |
1123 | err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len); | |
1124 | if (err) | |
1125 | goto out_err; | |
1126 | } | |
1127 | wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head); | |
1128 | } else { | |
1129 | wrapped = ptr->snapshot_refs[idx].wrapped; | |
1130 | if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) { | |
1131 | ptr->snapshot_refs[idx].wrapped = true; | |
1132 | wrapped = true; | |
1133 | } | |
1134 | } | |
1135 | ||
1136 | /* | |
1137 | * In full trace mode 'head' continually increases. However in snapshot | |
1138 | * mode 'head' is an offset within the buffer. Here 'old' and 'head' | |
1139 | * are adjusted to match the full trace case which expects that 'old' is | |
1140 | * always less than 'head'. | |
1141 | */ | |
1142 | if (wrapped) { | |
1143 | *old = *head; | |
1144 | *head += mm->len; | |
1145 | } else { | |
1146 | if (mm->mask) | |
1147 | *old &= mm->mask; | |
1148 | else | |
1149 | *old %= mm->len; | |
1150 | if (*old > *head) | |
1151 | *head += mm->len; | |
1152 | } | |
1153 | ||
1154 | pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", | |
1155 | __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); | |
1156 | ||
1157 | return 0; | |
1158 | ||
1159 | out_err: | |
1160 | pr_err("%s: failed, error %d\n", __func__, err); | |
1161 | return err; | |
1162 | } | |
1163 | ||
1164 | static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) | |
1165 | { | |
1166 | return rdtsc(); | |
1167 | } | |
1168 | ||
90e457f7 AH |
1169 | struct auxtrace_record *intel_pt_recording_init(int *err) |
1170 | { | |
1171 | struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); | |
1172 | struct intel_pt_recording *ptr; | |
1173 | ||
1174 | if (!intel_pt_pmu) | |
1175 | return NULL; | |
1176 | ||
bd0c7a54 AH |
1177 | if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { |
1178 | *err = -errno; | |
1179 | return NULL; | |
1180 | } | |
1181 | ||
90e457f7 AH |
1182 | ptr = zalloc(sizeof(struct intel_pt_recording)); |
1183 | if (!ptr) { | |
1184 | *err = -ENOMEM; | |
1185 | return NULL; | |
1186 | } | |
1187 | ||
1188 | ptr->intel_pt_pmu = intel_pt_pmu; | |
ad60ba0c | 1189 | ptr->itr.pmu = intel_pt_pmu; |
90e457f7 AH |
1190 | ptr->itr.recording_options = intel_pt_recording_options; |
1191 | ptr->itr.info_priv_size = intel_pt_info_priv_size; | |
1192 | ptr->itr.info_fill = intel_pt_info_fill; | |
1193 | ptr->itr.free = intel_pt_recording_free; | |
1194 | ptr->itr.snapshot_start = intel_pt_snapshot_start; | |
1195 | ptr->itr.snapshot_finish = intel_pt_snapshot_finish; | |
1196 | ptr->itr.find_snapshot = intel_pt_find_snapshot; | |
1197 | ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; | |
1198 | ptr->itr.reference = intel_pt_reference; | |
ad60ba0c | 1199 | ptr->itr.read_finish = auxtrace_record__read_finish; |
c4ab2f0f AH |
1200 | /* |
1201 | * Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K | |
1202 | * should give at least 1 PSB per sample. | |
1203 | */ | |
1204 | ptr->itr.default_aux_sample_size = 4096; | |
90e457f7 AH |
1205 | return &ptr->itr; |
1206 | } |