perf intel-pt: Add support for efficient time interval filtering
[linux-2.6-block.git] / tools / perf / util / intel-pt.c
1 /*
2  * intel_pt.c: Intel Processor Trace support
3  * Copyright (c) 2013-2015, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15
16 #include <inttypes.h>
17 #include <stdio.h>
18 #include <stdbool.h>
19 #include <errno.h>
20 #include <linux/kernel.h>
21 #include <linux/types.h>
22
23 #include "../perf.h"
24 #include "session.h"
25 #include "machine.h"
26 #include "memswap.h"
27 #include "sort.h"
28 #include "tool.h"
29 #include "event.h"
30 #include "evlist.h"
31 #include "evsel.h"
32 #include "map.h"
33 #include "color.h"
34 #include "util.h"
35 #include "thread.h"
36 #include "thread-stack.h"
37 #include "symbol.h"
38 #include "callchain.h"
39 #include "dso.h"
40 #include "debug.h"
41 #include "auxtrace.h"
42 #include "tsc.h"
43 #include "intel-pt.h"
44 #include "config.h"
45 #include "time-utils.h"
46
47 #include "intel-pt-decoder/intel-pt-log.h"
48 #include "intel-pt-decoder/intel-pt-decoder.h"
49 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
50 #include "intel-pt-decoder/intel-pt-pkt-decoder.h"
51
52 #define MAX_TIMESTAMP (~0ULL)
53
54 struct range {
55         u64 start;
56         u64 end;
57 };
58
59 struct intel_pt {
60         struct auxtrace auxtrace;
61         struct auxtrace_queues queues;
62         struct auxtrace_heap heap;
63         u32 auxtrace_type;
64         struct perf_session *session;
65         struct machine *machine;
66         struct perf_evsel *switch_evsel;
67         struct thread *unknown_thread;
68         bool timeless_decoding;
69         bool sampling_mode;
70         bool snapshot_mode;
71         bool per_cpu_mmaps;
72         bool have_tsc;
73         bool data_queued;
74         bool est_tsc;
75         bool sync_switch;
76         bool mispred_all;
77         int have_sched_switch;
78         u32 pmu_type;
79         u64 kernel_start;
80         u64 switch_ip;
81         u64 ptss_ip;
82
83         struct perf_tsc_conversion tc;
84         bool cap_user_time_zero;
85
86         struct itrace_synth_opts synth_opts;
87
88         bool sample_instructions;
89         u64 instructions_sample_type;
90         u64 instructions_id;
91
92         bool sample_branches;
93         u32 branches_filter;
94         u64 branches_sample_type;
95         u64 branches_id;
96
97         bool sample_transactions;
98         u64 transactions_sample_type;
99         u64 transactions_id;
100
101         bool sample_ptwrites;
102         u64 ptwrites_sample_type;
103         u64 ptwrites_id;
104
105         bool sample_pwr_events;
106         u64 pwr_events_sample_type;
107         u64 mwait_id;
108         u64 pwre_id;
109         u64 exstop_id;
110         u64 pwrx_id;
111         u64 cbr_id;
112
113         u64 tsc_bit;
114         u64 mtc_bit;
115         u64 mtc_freq_bits;
116         u32 tsc_ctc_ratio_n;
117         u32 tsc_ctc_ratio_d;
118         u64 cyc_bit;
119         u64 noretcomp_bit;
120         unsigned max_non_turbo_ratio;
121         unsigned cbr2khz;
122
123         unsigned long num_events;
124
125         char *filter;
126         struct addr_filters filts;
127
128         struct range *time_ranges;
129         unsigned int range_cnt;
130 };
131
132 enum switch_state {
133         INTEL_PT_SS_NOT_TRACING,
134         INTEL_PT_SS_UNKNOWN,
135         INTEL_PT_SS_TRACING,
136         INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
137         INTEL_PT_SS_EXPECTING_SWITCH_IP,
138 };
139
140 struct intel_pt_queue {
141         struct intel_pt *pt;
142         unsigned int queue_nr;
143         struct auxtrace_buffer *buffer;
144         struct auxtrace_buffer *old_buffer;
145         void *decoder;
146         const struct intel_pt_state *state;
147         struct ip_callchain *chain;
148         struct branch_stack *last_branch;
149         struct branch_stack *last_branch_rb;
150         size_t last_branch_pos;
151         union perf_event *event_buf;
152         bool on_heap;
153         bool stop;
154         bool step_through_buffers;
155         bool use_buffer_pid_tid;
156         bool sync_switch;
157         pid_t pid, tid;
158         int cpu;
159         int switch_state;
160         pid_t next_tid;
161         struct thread *thread;
162         bool exclude_kernel;
163         bool have_sample;
164         u64 time;
165         u64 timestamp;
166         u64 sel_timestamp;
167         bool sel_start;
168         unsigned int sel_idx;
169         u32 flags;
170         u16 insn_len;
171         u64 last_insn_cnt;
172         u64 ipc_insn_cnt;
173         u64 ipc_cyc_cnt;
174         u64 last_in_insn_cnt;
175         u64 last_in_cyc_cnt;
176         u64 last_br_insn_cnt;
177         u64 last_br_cyc_cnt;
178         char insn[INTEL_PT_INSN_BUF_SZ];
179 };
180
181 static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
182                           unsigned char *buf, size_t len)
183 {
184         struct intel_pt_pkt packet;
185         size_t pos = 0;
186         int ret, pkt_len, i;
187         char desc[INTEL_PT_PKT_DESC_MAX];
188         const char *color = PERF_COLOR_BLUE;
189
190         color_fprintf(stdout, color,
191                       ". ... Intel Processor Trace data: size %zu bytes\n",
192                       len);
193
194         while (len) {
195                 ret = intel_pt_get_packet(buf, len, &packet);
196                 if (ret > 0)
197                         pkt_len = ret;
198                 else
199                         pkt_len = 1;
200                 printf(".");
201                 color_fprintf(stdout, color, "  %08x: ", pos);
202                 for (i = 0; i < pkt_len; i++)
203                         color_fprintf(stdout, color, " %02x", buf[i]);
204                 for (; i < 16; i++)
205                         color_fprintf(stdout, color, "   ");
206                 if (ret > 0) {
207                         ret = intel_pt_pkt_desc(&packet, desc,
208                                                 INTEL_PT_PKT_DESC_MAX);
209                         if (ret > 0)
210                                 color_fprintf(stdout, color, " %s\n", desc);
211                 } else {
212                         color_fprintf(stdout, color, " Bad packet!\n");
213                 }
214                 pos += pkt_len;
215                 buf += pkt_len;
216                 len -= pkt_len;
217         }
218 }
219
220 static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
221                                 size_t len)
222 {
223         printf(".\n");
224         intel_pt_dump(pt, buf, len);
225 }
226
227 static void intel_pt_log_event(union perf_event *event)
228 {
229         FILE *f = intel_pt_log_fp();
230
231         if (!intel_pt_enable_logging || !f)
232                 return;
233
234         perf_event__fprintf(event, f);
235 }
236
237 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
238                                    struct auxtrace_buffer *b)
239 {
240         bool consecutive = false;
241         void *start;
242
243         start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
244                                       pt->have_tsc, &consecutive);
245         if (!start)
246                 return -EINVAL;
247         b->use_size = b->data + b->size - start;
248         b->use_data = start;
249         if (b->use_size && consecutive)
250                 b->consecutive = true;
251         return 0;
252 }
253
254 static int intel_pt_get_buffer(struct intel_pt_queue *ptq,
255                                struct auxtrace_buffer *buffer,
256                                struct auxtrace_buffer *old_buffer,
257                                struct intel_pt_buffer *b)
258 {
259         bool might_overlap;
260
261         if (!buffer->data) {
262                 int fd = perf_data__fd(ptq->pt->session->data);
263
264                 buffer->data = auxtrace_buffer__get_data(buffer, fd);
265                 if (!buffer->data)
266                         return -ENOMEM;
267         }
268
269         might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode;
270         if (might_overlap && !buffer->consecutive && old_buffer &&
271             intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
272                 return -ENOMEM;
273
274         if (buffer->use_data) {
275                 b->len = buffer->use_size;
276                 b->buf = buffer->use_data;
277         } else {
278                 b->len = buffer->size;
279                 b->buf = buffer->data;
280         }
281         b->ref_timestamp = buffer->reference;
282
283         if (!old_buffer || (might_overlap && !buffer->consecutive)) {
284                 b->consecutive = false;
285                 b->trace_nr = buffer->buffer_nr + 1;
286         } else {
287                 b->consecutive = true;
288         }
289
290         return 0;
291 }
292
293 /* Do not drop buffers with references - refer intel_pt_get_trace() */
294 static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq,
295                                            struct auxtrace_buffer *buffer)
296 {
297         if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer)
298                 return;
299
300         auxtrace_buffer__drop_data(buffer);
301 }
302
303 /* Must be serialized with respect to intel_pt_get_trace() */
304 static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb,
305                               void *cb_data)
306 {
307         struct intel_pt_queue *ptq = data;
308         struct auxtrace_buffer *buffer = ptq->buffer;
309         struct auxtrace_buffer *old_buffer = ptq->old_buffer;
310         struct auxtrace_queue *queue;
311         int err = 0;
312
313         queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
314
315         while (1) {
316                 struct intel_pt_buffer b = { .len = 0 };
317
318                 buffer = auxtrace_buffer__next(queue, buffer);
319                 if (!buffer)
320                         break;
321
322                 err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b);
323                 if (err)
324                         break;
325
326                 if (b.len) {
327                         intel_pt_lookahead_drop_buffer(ptq, old_buffer);
328                         old_buffer = buffer;
329                 } else {
330                         intel_pt_lookahead_drop_buffer(ptq, buffer);
331                         continue;
332                 }
333
334                 err = cb(&b, cb_data);
335                 if (err)
336                         break;
337         }
338
339         if (buffer != old_buffer)
340                 intel_pt_lookahead_drop_buffer(ptq, buffer);
341         intel_pt_lookahead_drop_buffer(ptq, old_buffer);
342
343         return err;
344 }
345
346 /*
347  * This function assumes data is processed sequentially only.
348  * Must be serialized with respect to intel_pt_lookahead()
349  */
350 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
351 {
352         struct intel_pt_queue *ptq = data;
353         struct auxtrace_buffer *buffer = ptq->buffer;
354         struct auxtrace_buffer *old_buffer = ptq->old_buffer;
355         struct auxtrace_queue *queue;
356         int err;
357
358         if (ptq->stop) {
359                 b->len = 0;
360                 return 0;
361         }
362
363         queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
364
365         buffer = auxtrace_buffer__next(queue, buffer);
366         if (!buffer) {
367                 if (old_buffer)
368                         auxtrace_buffer__drop_data(old_buffer);
369                 b->len = 0;
370                 return 0;
371         }
372
373         ptq->buffer = buffer;
374
375         err = intel_pt_get_buffer(ptq, buffer, old_buffer, b);
376         if (err)
377                 return err;
378
379         if (ptq->step_through_buffers)
380                 ptq->stop = true;
381
382         if (b->len) {
383                 if (old_buffer)
384                         auxtrace_buffer__drop_data(old_buffer);
385                 ptq->old_buffer = buffer;
386         } else {
387                 auxtrace_buffer__drop_data(buffer);
388                 return intel_pt_get_trace(b, data);
389         }
390
391         return 0;
392 }
393
394 struct intel_pt_cache_entry {
395         struct auxtrace_cache_entry     entry;
396         u64                             insn_cnt;
397         u64                             byte_cnt;
398         enum intel_pt_insn_op           op;
399         enum intel_pt_insn_branch       branch;
400         int                             length;
401         int32_t                         rel;
402         char                            insn[INTEL_PT_INSN_BUF_SZ];
403 };
404
405 static int intel_pt_config_div(const char *var, const char *value, void *data)
406 {
407         int *d = data;
408         long val;
409
410         if (!strcmp(var, "intel-pt.cache-divisor")) {
411                 val = strtol(value, NULL, 0);
412                 if (val > 0 && val <= INT_MAX)
413                         *d = val;
414         }
415
416         return 0;
417 }
418
419 static int intel_pt_cache_divisor(void)
420 {
421         static int d;
422
423         if (d)
424                 return d;
425
426         perf_config(intel_pt_config_div, &d);
427
428         if (!d)
429                 d = 64;
430
431         return d;
432 }
433
434 static unsigned int intel_pt_cache_size(struct dso *dso,
435                                         struct machine *machine)
436 {
437         off_t size;
438
439         size = dso__data_size(dso, machine);
440         size /= intel_pt_cache_divisor();
441         if (size < 1000)
442                 return 10;
443         if (size > (1 << 21))
444                 return 21;
445         return 32 - __builtin_clz(size);
446 }
447
448 static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
449                                              struct machine *machine)
450 {
451         struct auxtrace_cache *c;
452         unsigned int bits;
453
454         if (dso->auxtrace_cache)
455                 return dso->auxtrace_cache;
456
457         bits = intel_pt_cache_size(dso, machine);
458
459         /* Ignoring cache creation failure */
460         c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
461
462         dso->auxtrace_cache = c;
463
464         return c;
465 }
466
467 static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
468                               u64 offset, u64 insn_cnt, u64 byte_cnt,
469                               struct intel_pt_insn *intel_pt_insn)
470 {
471         struct auxtrace_cache *c = intel_pt_cache(dso, machine);
472         struct intel_pt_cache_entry *e;
473         int err;
474
475         if (!c)
476                 return -ENOMEM;
477
478         e = auxtrace_cache__alloc_entry(c);
479         if (!e)
480                 return -ENOMEM;
481
482         e->insn_cnt = insn_cnt;
483         e->byte_cnt = byte_cnt;
484         e->op = intel_pt_insn->op;
485         e->branch = intel_pt_insn->branch;
486         e->length = intel_pt_insn->length;
487         e->rel = intel_pt_insn->rel;
488         memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
489
490         err = auxtrace_cache__add(c, offset, &e->entry);
491         if (err)
492                 auxtrace_cache__free_entry(c, e);
493
494         return err;
495 }
496
497 static struct intel_pt_cache_entry *
498 intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
499 {
500         struct auxtrace_cache *c = intel_pt_cache(dso, machine);
501
502         if (!c)
503                 return NULL;
504
505         return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
506 }
507
508 static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
509 {
510         return ip >= pt->kernel_start ?
511                PERF_RECORD_MISC_KERNEL :
512                PERF_RECORD_MISC_USER;
513 }
514
515 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
516                                    uint64_t *insn_cnt_ptr, uint64_t *ip,
517                                    uint64_t to_ip, uint64_t max_insn_cnt,
518                                    void *data)
519 {
520         struct intel_pt_queue *ptq = data;
521         struct machine *machine = ptq->pt->machine;
522         struct thread *thread;
523         struct addr_location al;
524         unsigned char buf[INTEL_PT_INSN_BUF_SZ];
525         ssize_t len;
526         int x86_64;
527         u8 cpumode;
528         u64 offset, start_offset, start_ip;
529         u64 insn_cnt = 0;
530         bool one_map = true;
531
532         intel_pt_insn->length = 0;
533
534         if (to_ip && *ip == to_ip)
535                 goto out_no_cache;
536
537         cpumode = intel_pt_cpumode(ptq->pt, *ip);
538
539         thread = ptq->thread;
540         if (!thread) {
541                 if (cpumode != PERF_RECORD_MISC_KERNEL)
542                         return -EINVAL;
543                 thread = ptq->pt->unknown_thread;
544         }
545
546         while (1) {
547                 if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso)
548                         return -EINVAL;
549
550                 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
551                     dso__data_status_seen(al.map->dso,
552                                           DSO_DATA_STATUS_SEEN_ITRACE))
553                         return -ENOENT;
554
555                 offset = al.map->map_ip(al.map, *ip);
556
557                 if (!to_ip && one_map) {
558                         struct intel_pt_cache_entry *e;
559
560                         e = intel_pt_cache_lookup(al.map->dso, machine, offset);
561                         if (e &&
562                             (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
563                                 *insn_cnt_ptr = e->insn_cnt;
564                                 *ip += e->byte_cnt;
565                                 intel_pt_insn->op = e->op;
566                                 intel_pt_insn->branch = e->branch;
567                                 intel_pt_insn->length = e->length;
568                                 intel_pt_insn->rel = e->rel;
569                                 memcpy(intel_pt_insn->buf, e->insn,
570                                        INTEL_PT_INSN_BUF_SZ);
571                                 intel_pt_log_insn_no_data(intel_pt_insn, *ip);
572                                 return 0;
573                         }
574                 }
575
576                 start_offset = offset;
577                 start_ip = *ip;
578
579                 /* Load maps to ensure dso->is_64_bit has been updated */
580                 map__load(al.map);
581
582                 x86_64 = al.map->dso->is_64_bit;
583
584                 while (1) {
585                         len = dso__data_read_offset(al.map->dso, machine,
586                                                     offset, buf,
587                                                     INTEL_PT_INSN_BUF_SZ);
588                         if (len <= 0)
589                                 return -EINVAL;
590
591                         if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
592                                 return -EINVAL;
593
594                         intel_pt_log_insn(intel_pt_insn, *ip);
595
596                         insn_cnt += 1;
597
598                         if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
599                                 goto out;
600
601                         if (max_insn_cnt && insn_cnt >= max_insn_cnt)
602                                 goto out_no_cache;
603
604                         *ip += intel_pt_insn->length;
605
606                         if (to_ip && *ip == to_ip)
607                                 goto out_no_cache;
608
609                         if (*ip >= al.map->end)
610                                 break;
611
612                         offset += intel_pt_insn->length;
613                 }
614                 one_map = false;
615         }
616 out:
617         *insn_cnt_ptr = insn_cnt;
618
619         if (!one_map)
620                 goto out_no_cache;
621
622         /*
623          * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
624          * entries.
625          */
626         if (to_ip) {
627                 struct intel_pt_cache_entry *e;
628
629                 e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
630                 if (e)
631                         return 0;
632         }
633
634         /* Ignore cache errors */
635         intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
636                            *ip - start_ip, intel_pt_insn);
637
638         return 0;
639
640 out_no_cache:
641         *insn_cnt_ptr = insn_cnt;
642         return 0;
643 }
644
645 static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
646                                   uint64_t offset, const char *filename)
647 {
648         struct addr_filter *filt;
649         bool have_filter   = false;
650         bool hit_tracestop = false;
651         bool hit_filter    = false;
652
653         list_for_each_entry(filt, &pt->filts.head, list) {
654                 if (filt->start)
655                         have_filter = true;
656
657                 if ((filename && !filt->filename) ||
658                     (!filename && filt->filename) ||
659                     (filename && strcmp(filename, filt->filename)))
660                         continue;
661
662                 if (!(offset >= filt->addr && offset < filt->addr + filt->size))
663                         continue;
664
665                 intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
666                              ip, offset, filename ? filename : "[kernel]",
667                              filt->start ? "filter" : "stop",
668                              filt->addr, filt->size);
669
670                 if (filt->start)
671                         hit_filter = true;
672                 else
673                         hit_tracestop = true;
674         }
675
676         if (!hit_tracestop && !hit_filter)
677                 intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
678                              ip, offset, filename ? filename : "[kernel]");
679
680         return hit_tracestop || (have_filter && !hit_filter);
681 }
682
683 static int __intel_pt_pgd_ip(uint64_t ip, void *data)
684 {
685         struct intel_pt_queue *ptq = data;
686         struct thread *thread;
687         struct addr_location al;
688         u8 cpumode;
689         u64 offset;
690
691         if (ip >= ptq->pt->kernel_start)
692                 return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
693
694         cpumode = PERF_RECORD_MISC_USER;
695
696         thread = ptq->thread;
697         if (!thread)
698                 return -EINVAL;
699
700         if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
701                 return -EINVAL;
702
703         offset = al.map->map_ip(al.map, ip);
704
705         return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
706                                      al.map->dso->long_name);
707 }
708
709 static bool intel_pt_pgd_ip(uint64_t ip, void *data)
710 {
711         return __intel_pt_pgd_ip(ip, data) > 0;
712 }
713
714 static bool intel_pt_get_config(struct intel_pt *pt,
715                                 struct perf_event_attr *attr, u64 *config)
716 {
717         if (attr->type == pt->pmu_type) {
718                 if (config)
719                         *config = attr->config;
720                 return true;
721         }
722
723         return false;
724 }
725
726 static bool intel_pt_exclude_kernel(struct intel_pt *pt)
727 {
728         struct perf_evsel *evsel;
729
730         evlist__for_each_entry(pt->session->evlist, evsel) {
731                 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
732                     !evsel->attr.exclude_kernel)
733                         return false;
734         }
735         return true;
736 }
737
738 static bool intel_pt_return_compression(struct intel_pt *pt)
739 {
740         struct perf_evsel *evsel;
741         u64 config;
742
743         if (!pt->noretcomp_bit)
744                 return true;
745
746         evlist__for_each_entry(pt->session->evlist, evsel) {
747                 if (intel_pt_get_config(pt, &evsel->attr, &config) &&
748                     (config & pt->noretcomp_bit))
749                         return false;
750         }
751         return true;
752 }
753
754 static bool intel_pt_branch_enable(struct intel_pt *pt)
755 {
756         struct perf_evsel *evsel;
757         u64 config;
758
759         evlist__for_each_entry(pt->session->evlist, evsel) {
760                 if (intel_pt_get_config(pt, &evsel->attr, &config) &&
761                     (config & 1) && !(config & 0x2000))
762                         return false;
763         }
764         return true;
765 }
766
767 static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
768 {
769         struct perf_evsel *evsel;
770         unsigned int shift;
771         u64 config;
772
773         if (!pt->mtc_freq_bits)
774                 return 0;
775
776         for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
777                 config >>= 1;
778
779         evlist__for_each_entry(pt->session->evlist, evsel) {
780                 if (intel_pt_get_config(pt, &evsel->attr, &config))
781                         return (config & pt->mtc_freq_bits) >> shift;
782         }
783         return 0;
784 }
785
786 static bool intel_pt_timeless_decoding(struct intel_pt *pt)
787 {
788         struct perf_evsel *evsel;
789         bool timeless_decoding = true;
790         u64 config;
791
792         if (!pt->tsc_bit || !pt->cap_user_time_zero)
793                 return true;
794
795         evlist__for_each_entry(pt->session->evlist, evsel) {
796                 if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
797                         return true;
798                 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
799                         if (config & pt->tsc_bit)
800                                 timeless_decoding = false;
801                         else
802                                 return true;
803                 }
804         }
805         return timeless_decoding;
806 }
807
808 static bool intel_pt_tracing_kernel(struct intel_pt *pt)
809 {
810         struct perf_evsel *evsel;
811
812         evlist__for_each_entry(pt->session->evlist, evsel) {
813                 if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
814                     !evsel->attr.exclude_kernel)
815                         return true;
816         }
817         return false;
818 }
819
820 static bool intel_pt_have_tsc(struct intel_pt *pt)
821 {
822         struct perf_evsel *evsel;
823         bool have_tsc = false;
824         u64 config;
825
826         if (!pt->tsc_bit)
827                 return false;
828
829         evlist__for_each_entry(pt->session->evlist, evsel) {
830                 if (intel_pt_get_config(pt, &evsel->attr, &config)) {
831                         if (config & pt->tsc_bit)
832                                 have_tsc = true;
833                         else
834                                 return false;
835                 }
836         }
837         return have_tsc;
838 }
839
840 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
841 {
842         u64 quot, rem;
843
844         quot = ns / pt->tc.time_mult;
845         rem  = ns % pt->tc.time_mult;
846         return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
847                 pt->tc.time_mult;
848 }
849
850 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
851                                                    unsigned int queue_nr)
852 {
853         struct intel_pt_params params = { .get_trace = 0, };
854         struct perf_env *env = pt->machine->env;
855         struct intel_pt_queue *ptq;
856
857         ptq = zalloc(sizeof(struct intel_pt_queue));
858         if (!ptq)
859                 return NULL;
860
861         if (pt->synth_opts.callchain) {
862                 size_t sz = sizeof(struct ip_callchain);
863
864                 /* Add 1 to callchain_sz for callchain context */
865                 sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
866                 ptq->chain = zalloc(sz);
867                 if (!ptq->chain)
868                         goto out_free;
869         }
870
871         if (pt->synth_opts.last_branch) {
872                 size_t sz = sizeof(struct branch_stack);
873
874                 sz += pt->synth_opts.last_branch_sz *
875                       sizeof(struct branch_entry);
876                 ptq->last_branch = zalloc(sz);
877                 if (!ptq->last_branch)
878                         goto out_free;
879                 ptq->last_branch_rb = zalloc(sz);
880                 if (!ptq->last_branch_rb)
881                         goto out_free;
882         }
883
884         ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
885         if (!ptq->event_buf)
886                 goto out_free;
887
888         ptq->pt = pt;
889         ptq->queue_nr = queue_nr;
890         ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
891         ptq->pid = -1;
892         ptq->tid = -1;
893         ptq->cpu = -1;
894         ptq->next_tid = -1;
895
896         params.get_trace = intel_pt_get_trace;
897         params.walk_insn = intel_pt_walk_next_insn;
898         params.lookahead = intel_pt_lookahead;
899         params.data = ptq;
900         params.return_compression = intel_pt_return_compression(pt);
901         params.branch_enable = intel_pt_branch_enable(pt);
902         params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
903         params.mtc_period = intel_pt_mtc_period(pt);
904         params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
905         params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
906
907         if (pt->filts.cnt > 0)
908                 params.pgd_ip = intel_pt_pgd_ip;
909
910         if (pt->synth_opts.instructions) {
911                 if (pt->synth_opts.period) {
912                         switch (pt->synth_opts.period_type) {
913                         case PERF_ITRACE_PERIOD_INSTRUCTIONS:
914                                 params.period_type =
915                                                 INTEL_PT_PERIOD_INSTRUCTIONS;
916                                 params.period = pt->synth_opts.period;
917                                 break;
918                         case PERF_ITRACE_PERIOD_TICKS:
919                                 params.period_type = INTEL_PT_PERIOD_TICKS;
920                                 params.period = pt->synth_opts.period;
921                                 break;
922                         case PERF_ITRACE_PERIOD_NANOSECS:
923                                 params.period_type = INTEL_PT_PERIOD_TICKS;
924                                 params.period = intel_pt_ns_to_ticks(pt,
925                                                         pt->synth_opts.period);
926                                 break;
927                         default:
928                                 break;
929                         }
930                 }
931
932                 if (!params.period) {
933                         params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
934                         params.period = 1;
935                 }
936         }
937
938         if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18))
939                 params.flags |= INTEL_PT_FUP_WITH_NLIP;
940
941         ptq->decoder = intel_pt_decoder_new(&params);
942         if (!ptq->decoder)
943                 goto out_free;
944
945         return ptq;
946
947 out_free:
948         zfree(&ptq->event_buf);
949         zfree(&ptq->last_branch);
950         zfree(&ptq->last_branch_rb);
951         zfree(&ptq->chain);
952         free(ptq);
953         return NULL;
954 }
955
956 static void intel_pt_free_queue(void *priv)
957 {
958         struct intel_pt_queue *ptq = priv;
959
960         if (!ptq)
961                 return;
962         thread__zput(ptq->thread);
963         intel_pt_decoder_free(ptq->decoder);
964         zfree(&ptq->event_buf);
965         zfree(&ptq->last_branch);
966         zfree(&ptq->last_branch_rb);
967         zfree(&ptq->chain);
968         free(ptq);
969 }
970
971 static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
972                                      struct auxtrace_queue *queue)
973 {
974         struct intel_pt_queue *ptq = queue->priv;
975
976         if (queue->tid == -1 || pt->have_sched_switch) {
977                 ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
978                 thread__zput(ptq->thread);
979         }
980
981         if (!ptq->thread && ptq->tid != -1)
982                 ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
983
984         if (ptq->thread) {
985                 ptq->pid = ptq->thread->pid_;
986                 if (queue->cpu == -1)
987                         ptq->cpu = ptq->thread->cpu;
988         }
989 }
990
991 static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
992 {
993         if (ptq->state->flags & INTEL_PT_ABORT_TX) {
994                 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
995         } else if (ptq->state->flags & INTEL_PT_ASYNC) {
996                 if (ptq->state->to_ip)
997                         ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
998                                      PERF_IP_FLAG_ASYNC |
999                                      PERF_IP_FLAG_INTERRUPT;
1000                 else
1001                         ptq->flags = PERF_IP_FLAG_BRANCH |
1002                                      PERF_IP_FLAG_TRACE_END;
1003                 ptq->insn_len = 0;
1004         } else {
1005                 if (ptq->state->from_ip)
1006                         ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
1007                 else
1008                         ptq->flags = PERF_IP_FLAG_BRANCH |
1009                                      PERF_IP_FLAG_TRACE_BEGIN;
1010                 if (ptq->state->flags & INTEL_PT_IN_TX)
1011                         ptq->flags |= PERF_IP_FLAG_IN_TX;
1012                 ptq->insn_len = ptq->state->insn_len;
1013                 memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
1014         }
1015
1016         if (ptq->state->type & INTEL_PT_TRACE_BEGIN)
1017                 ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN;
1018         if (ptq->state->type & INTEL_PT_TRACE_END)
1019                 ptq->flags |= PERF_IP_FLAG_TRACE_END;
1020 }
1021
1022 static void intel_pt_setup_time_range(struct intel_pt *pt,
1023                                       struct intel_pt_queue *ptq)
1024 {
1025         if (!pt->range_cnt)
1026                 return;
1027
1028         ptq->sel_timestamp = pt->time_ranges[0].start;
1029         ptq->sel_idx = 0;
1030
1031         if (ptq->sel_timestamp) {
1032                 ptq->sel_start = true;
1033         } else {
1034                 ptq->sel_timestamp = pt->time_ranges[0].end;
1035                 ptq->sel_start = false;
1036         }
1037 }
1038
1039 static int intel_pt_setup_queue(struct intel_pt *pt,
1040                                 struct auxtrace_queue *queue,
1041                                 unsigned int queue_nr)
1042 {
1043         struct intel_pt_queue *ptq = queue->priv;
1044
1045         if (list_empty(&queue->head))
1046                 return 0;
1047
1048         if (!ptq) {
1049                 ptq = intel_pt_alloc_queue(pt, queue_nr);
1050                 if (!ptq)
1051                         return -ENOMEM;
1052                 queue->priv = ptq;
1053
1054                 if (queue->cpu != -1)
1055                         ptq->cpu = queue->cpu;
1056                 ptq->tid = queue->tid;
1057
1058                 if (pt->sampling_mode && !pt->snapshot_mode &&
1059                     pt->timeless_decoding)
1060                         ptq->step_through_buffers = true;
1061
1062                 ptq->sync_switch = pt->sync_switch;
1063
1064                 intel_pt_setup_time_range(pt, ptq);
1065         }
1066
1067         if (!ptq->on_heap &&
1068             (!ptq->sync_switch ||
1069              ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
1070                 const struct intel_pt_state *state;
1071                 int ret;
1072
1073                 if (pt->timeless_decoding)
1074                         return 0;
1075
1076                 intel_pt_log("queue %u getting timestamp\n", queue_nr);
1077                 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1078                              queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1079
1080                 if (ptq->sel_start && ptq->sel_timestamp) {
1081                         ret = intel_pt_fast_forward(ptq->decoder,
1082                                                     ptq->sel_timestamp);
1083                         if (ret)
1084                                 return ret;
1085                 }
1086
1087                 while (1) {
1088                         state = intel_pt_decode(ptq->decoder);
1089                         if (state->err) {
1090                                 if (state->err == INTEL_PT_ERR_NODATA) {
1091                                         intel_pt_log("queue %u has no timestamp\n",
1092                                                      queue_nr);
1093                                         return 0;
1094                                 }
1095                                 continue;
1096                         }
1097                         if (state->timestamp)
1098                                 break;
1099                 }
1100
1101                 ptq->timestamp = state->timestamp;
1102                 intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
1103                              queue_nr, ptq->timestamp);
1104                 ptq->state = state;
1105                 ptq->have_sample = true;
1106                 if (ptq->sel_start && ptq->sel_timestamp &&
1107                     ptq->timestamp < ptq->sel_timestamp)
1108                         ptq->have_sample = false;
1109                 intel_pt_sample_flags(ptq);
1110                 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
1111                 if (ret)
1112                         return ret;
1113                 ptq->on_heap = true;
1114         }
1115
1116         return 0;
1117 }
1118
1119 static int intel_pt_setup_queues(struct intel_pt *pt)
1120 {
1121         unsigned int i;
1122         int ret;
1123
1124         for (i = 0; i < pt->queues.nr_queues; i++) {
1125                 ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
1126                 if (ret)
1127                         return ret;
1128         }
1129         return 0;
1130 }
1131
1132 static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
1133 {
1134         struct branch_stack *bs_src = ptq->last_branch_rb;
1135         struct branch_stack *bs_dst = ptq->last_branch;
1136         size_t nr = 0;
1137
1138         bs_dst->nr = bs_src->nr;
1139
1140         if (!bs_src->nr)
1141                 return;
1142
1143         nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
1144         memcpy(&bs_dst->entries[0],
1145                &bs_src->entries[ptq->last_branch_pos],
1146                sizeof(struct branch_entry) * nr);
1147
1148         if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
1149                 memcpy(&bs_dst->entries[nr],
1150                        &bs_src->entries[0],
1151                        sizeof(struct branch_entry) * ptq->last_branch_pos);
1152         }
1153 }
1154
1155 static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
1156 {
1157         ptq->last_branch_pos = 0;
1158         ptq->last_branch_rb->nr = 0;
1159 }
1160
1161 static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
1162 {
1163         const struct intel_pt_state *state = ptq->state;
1164         struct branch_stack *bs = ptq->last_branch_rb;
1165         struct branch_entry *be;
1166
1167         if (!ptq->last_branch_pos)
1168                 ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
1169
1170         ptq->last_branch_pos -= 1;
1171
1172         be              = &bs->entries[ptq->last_branch_pos];
1173         be->from        = state->from_ip;
1174         be->to          = state->to_ip;
1175         be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
1176         be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
1177         /* No support for mispredict */
1178         be->flags.mispred = ptq->pt->mispred_all;
1179
1180         if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
1181                 bs->nr += 1;
1182 }
1183
1184 static inline bool intel_pt_skip_event(struct intel_pt *pt)
1185 {
1186         return pt->synth_opts.initial_skip &&
1187                pt->num_events++ < pt->synth_opts.initial_skip;
1188 }
1189
1190 static void intel_pt_prep_b_sample(struct intel_pt *pt,
1191                                    struct intel_pt_queue *ptq,
1192                                    union perf_event *event,
1193                                    struct perf_sample *sample)
1194 {
1195         if (!pt->timeless_decoding)
1196                 sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1197
1198         sample->ip = ptq->state->from_ip;
1199         sample->cpumode = intel_pt_cpumode(pt, sample->ip);
1200         sample->pid = ptq->pid;
1201         sample->tid = ptq->tid;
1202         sample->addr = ptq->state->to_ip;
1203         sample->period = 1;
1204         sample->cpu = ptq->cpu;
1205         sample->flags = ptq->flags;
1206         sample->insn_len = ptq->insn_len;
1207         memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1208
1209         event->sample.header.type = PERF_RECORD_SAMPLE;
1210         event->sample.header.misc = sample->cpumode;
1211         event->sample.header.size = sizeof(struct perf_event_header);
1212 }
1213
1214 static int intel_pt_inject_event(union perf_event *event,
1215                                  struct perf_sample *sample, u64 type)
1216 {
1217         event->header.size = perf_event__sample_event_size(sample, type, 0);
1218         return perf_event__synthesize_sample(event, type, 0, sample);
1219 }
1220
1221 static inline int intel_pt_opt_inject(struct intel_pt *pt,
1222                                       union perf_event *event,
1223                                       struct perf_sample *sample, u64 type)
1224 {
1225         if (!pt->synth_opts.inject)
1226                 return 0;
1227
1228         return intel_pt_inject_event(event, sample, type);
1229 }
1230
1231 static int intel_pt_deliver_synth_b_event(struct intel_pt *pt,
1232                                           union perf_event *event,
1233                                           struct perf_sample *sample, u64 type)
1234 {
1235         int ret;
1236
1237         ret = intel_pt_opt_inject(pt, event, sample, type);
1238         if (ret)
1239                 return ret;
1240
1241         ret = perf_session__deliver_synth_event(pt->session, event, sample);
1242         if (ret)
1243                 pr_err("Intel PT: failed to deliver event, error %d\n", ret);
1244
1245         return ret;
1246 }
1247
1248 static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
1249 {
1250         struct intel_pt *pt = ptq->pt;
1251         union perf_event *event = ptq->event_buf;
1252         struct perf_sample sample = { .ip = 0, };
1253         struct dummy_branch_stack {
1254                 u64                     nr;
1255                 struct branch_entry     entries;
1256         } dummy_bs;
1257
1258         if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
1259                 return 0;
1260
1261         if (intel_pt_skip_event(pt))
1262                 return 0;
1263
1264         intel_pt_prep_b_sample(pt, ptq, event, &sample);
1265
1266         sample.id = ptq->pt->branches_id;
1267         sample.stream_id = ptq->pt->branches_id;
1268
1269         /*
1270          * perf report cannot handle events without a branch stack when using
1271          * SORT_MODE__BRANCH so make a dummy one.
1272          */
1273         if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
1274                 dummy_bs = (struct dummy_branch_stack){
1275                         .nr = 1,
1276                         .entries = {
1277                                 .from = sample.ip,
1278                                 .to = sample.addr,
1279                         },
1280                 };
1281                 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1282         }
1283
1284         sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
1285         if (sample.cyc_cnt) {
1286                 sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
1287                 ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
1288                 ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
1289         }
1290
1291         return intel_pt_deliver_synth_b_event(pt, event, &sample,
1292                                               pt->branches_sample_type);
1293 }
1294
1295 static void intel_pt_prep_sample(struct intel_pt *pt,
1296                                  struct intel_pt_queue *ptq,
1297                                  union perf_event *event,
1298                                  struct perf_sample *sample)
1299 {
1300         intel_pt_prep_b_sample(pt, ptq, event, sample);
1301
1302         if (pt->synth_opts.callchain) {
1303                 thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
1304                                      pt->synth_opts.callchain_sz + 1,
1305                                      sample->ip, pt->kernel_start);
1306                 sample->callchain = ptq->chain;
1307         }
1308
1309         if (pt->synth_opts.last_branch) {
1310                 intel_pt_copy_last_branch_rb(ptq);
1311                 sample->branch_stack = ptq->last_branch;
1312         }
1313 }
1314
1315 static inline int intel_pt_deliver_synth_event(struct intel_pt *pt,
1316                                                struct intel_pt_queue *ptq,
1317                                                union perf_event *event,
1318                                                struct perf_sample *sample,
1319                                                u64 type)
1320 {
1321         int ret;
1322
1323         ret = intel_pt_deliver_synth_b_event(pt, event, sample, type);
1324
1325         if (pt->synth_opts.last_branch)
1326                 intel_pt_reset_last_branch_rb(ptq);
1327
1328         return ret;
1329 }
1330
1331 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
1332 {
1333         struct intel_pt *pt = ptq->pt;
1334         union perf_event *event = ptq->event_buf;
1335         struct perf_sample sample = { .ip = 0, };
1336
1337         if (intel_pt_skip_event(pt))
1338                 return 0;
1339
1340         intel_pt_prep_sample(pt, ptq, event, &sample);
1341
1342         sample.id = ptq->pt->instructions_id;
1343         sample.stream_id = ptq->pt->instructions_id;
1344         sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
1345
1346         sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
1347         if (sample.cyc_cnt) {
1348                 sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
1349                 ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
1350                 ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt;
1351         }
1352
1353         ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
1354
1355         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1356                                             pt->instructions_sample_type);
1357 }
1358
1359 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1360 {
1361         struct intel_pt *pt = ptq->pt;
1362         union perf_event *event = ptq->event_buf;
1363         struct perf_sample sample = { .ip = 0, };
1364
1365         if (intel_pt_skip_event(pt))
1366                 return 0;
1367
1368         intel_pt_prep_sample(pt, ptq, event, &sample);
1369
1370         sample.id = ptq->pt->transactions_id;
1371         sample.stream_id = ptq->pt->transactions_id;
1372
1373         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1374                                             pt->transactions_sample_type);
1375 }
1376
1377 static void intel_pt_prep_p_sample(struct intel_pt *pt,
1378                                    struct intel_pt_queue *ptq,
1379                                    union perf_event *event,
1380                                    struct perf_sample *sample)
1381 {
1382         intel_pt_prep_sample(pt, ptq, event, sample);
1383
1384         /*
1385          * Zero IP is used to mean "trace start" but that is not the case for
1386          * power or PTWRITE events with no IP, so clear the flags.
1387          */
1388         if (!sample->ip)
1389                 sample->flags = 0;
1390 }
1391
1392 static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
1393 {
1394         struct intel_pt *pt = ptq->pt;
1395         union perf_event *event = ptq->event_buf;
1396         struct perf_sample sample = { .ip = 0, };
1397         struct perf_synth_intel_ptwrite raw;
1398
1399         if (intel_pt_skip_event(pt))
1400                 return 0;
1401
1402         intel_pt_prep_p_sample(pt, ptq, event, &sample);
1403
1404         sample.id = ptq->pt->ptwrites_id;
1405         sample.stream_id = ptq->pt->ptwrites_id;
1406
1407         raw.flags = 0;
1408         raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
1409         raw.payload = cpu_to_le64(ptq->state->ptw_payload);
1410
1411         sample.raw_size = perf_synth__raw_size(raw);
1412         sample.raw_data = perf_synth__raw_data(&raw);
1413
1414         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1415                                             pt->ptwrites_sample_type);
1416 }
1417
1418 static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
1419 {
1420         struct intel_pt *pt = ptq->pt;
1421         union perf_event *event = ptq->event_buf;
1422         struct perf_sample sample = { .ip = 0, };
1423         struct perf_synth_intel_cbr raw;
1424         u32 flags;
1425
1426         if (intel_pt_skip_event(pt))
1427                 return 0;
1428
1429         intel_pt_prep_p_sample(pt, ptq, event, &sample);
1430
1431         sample.id = ptq->pt->cbr_id;
1432         sample.stream_id = ptq->pt->cbr_id;
1433
1434         flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16);
1435         raw.flags = cpu_to_le32(flags);
1436         raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz);
1437         raw.reserved3 = 0;
1438
1439         sample.raw_size = perf_synth__raw_size(raw);
1440         sample.raw_data = perf_synth__raw_data(&raw);
1441
1442         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1443                                             pt->pwr_events_sample_type);
1444 }
1445
1446 static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
1447 {
1448         struct intel_pt *pt = ptq->pt;
1449         union perf_event *event = ptq->event_buf;
1450         struct perf_sample sample = { .ip = 0, };
1451         struct perf_synth_intel_mwait raw;
1452
1453         if (intel_pt_skip_event(pt))
1454                 return 0;
1455
1456         intel_pt_prep_p_sample(pt, ptq, event, &sample);
1457
1458         sample.id = ptq->pt->mwait_id;
1459         sample.stream_id = ptq->pt->mwait_id;
1460
1461         raw.reserved = 0;
1462         raw.payload = cpu_to_le64(ptq->state->mwait_payload);
1463
1464         sample.raw_size = perf_synth__raw_size(raw);
1465         sample.raw_data = perf_synth__raw_data(&raw);
1466
1467         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1468                                             pt->pwr_events_sample_type);
1469 }
1470
1471 static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
1472 {
1473         struct intel_pt *pt = ptq->pt;
1474         union perf_event *event = ptq->event_buf;
1475         struct perf_sample sample = { .ip = 0, };
1476         struct perf_synth_intel_pwre raw;
1477
1478         if (intel_pt_skip_event(pt))
1479                 return 0;
1480
1481         intel_pt_prep_p_sample(pt, ptq, event, &sample);
1482
1483         sample.id = ptq->pt->pwre_id;
1484         sample.stream_id = ptq->pt->pwre_id;
1485
1486         raw.reserved = 0;
1487         raw.payload = cpu_to_le64(ptq->state->pwre_payload);
1488
1489         sample.raw_size = perf_synth__raw_size(raw);
1490         sample.raw_data = perf_synth__raw_data(&raw);
1491
1492         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1493                                             pt->pwr_events_sample_type);
1494 }
1495
1496 static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
1497 {
1498         struct intel_pt *pt = ptq->pt;
1499         union perf_event *event = ptq->event_buf;
1500         struct perf_sample sample = { .ip = 0, };
1501         struct perf_synth_intel_exstop raw;
1502
1503         if (intel_pt_skip_event(pt))
1504                 return 0;
1505
1506         intel_pt_prep_p_sample(pt, ptq, event, &sample);
1507
1508         sample.id = ptq->pt->exstop_id;
1509         sample.stream_id = ptq->pt->exstop_id;
1510
1511         raw.flags = 0;
1512         raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
1513
1514         sample.raw_size = perf_synth__raw_size(raw);
1515         sample.raw_data = perf_synth__raw_data(&raw);
1516
1517         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1518                                             pt->pwr_events_sample_type);
1519 }
1520
1521 static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
1522 {
1523         struct intel_pt *pt = ptq->pt;
1524         union perf_event *event = ptq->event_buf;
1525         struct perf_sample sample = { .ip = 0, };
1526         struct perf_synth_intel_pwrx raw;
1527
1528         if (intel_pt_skip_event(pt))
1529                 return 0;
1530
1531         intel_pt_prep_p_sample(pt, ptq, event, &sample);
1532
1533         sample.id = ptq->pt->pwrx_id;
1534         sample.stream_id = ptq->pt->pwrx_id;
1535
1536         raw.reserved = 0;
1537         raw.payload = cpu_to_le64(ptq->state->pwrx_payload);
1538
1539         sample.raw_size = perf_synth__raw_size(raw);
1540         sample.raw_data = perf_synth__raw_data(&raw);
1541
1542         return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1543                                             pt->pwr_events_sample_type);
1544 }
1545
1546 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1547                                 pid_t pid, pid_t tid, u64 ip, u64 timestamp)
1548 {
1549         union perf_event event;
1550         char msg[MAX_AUXTRACE_ERROR_MSG];
1551         int err;
1552
1553         intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
1554
1555         auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1556                              code, cpu, pid, tid, ip, msg, timestamp);
1557
1558         err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1559         if (err)
1560                 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1561                        err);
1562
1563         return err;
1564 }
1565
1566 static int intel_ptq_synth_error(struct intel_pt_queue *ptq,
1567                                  const struct intel_pt_state *state)
1568 {
1569         struct intel_pt *pt = ptq->pt;
1570         u64 tm = ptq->timestamp;
1571
1572         tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc);
1573
1574         return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid,
1575                                     ptq->tid, state->from_ip, tm);
1576 }
1577
1578 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1579 {
1580         struct auxtrace_queue *queue;
1581         pid_t tid = ptq->next_tid;
1582         int err;
1583
1584         if (tid == -1)
1585                 return 0;
1586
1587         intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
1588
1589         err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
1590
1591         queue = &pt->queues.queue_array[ptq->queue_nr];
1592         intel_pt_set_pid_tid_cpu(pt, queue);
1593
1594         ptq->next_tid = -1;
1595
1596         return err;
1597 }
1598
1599 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1600 {
1601         struct intel_pt *pt = ptq->pt;
1602
1603         return ip == pt->switch_ip &&
1604                (ptq->flags & PERF_IP_FLAG_BRANCH) &&
1605                !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
1606                                PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
1607 }
1608
1609 #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
1610                           INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \
1611                           INTEL_PT_CBR_CHG)
1612
1613 static int intel_pt_sample(struct intel_pt_queue *ptq)
1614 {
1615         const struct intel_pt_state *state = ptq->state;
1616         struct intel_pt *pt = ptq->pt;
1617         int err;
1618
1619         if (!ptq->have_sample)
1620                 return 0;
1621
1622         ptq->have_sample = false;
1623
1624         if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) {
1625                 /*
1626                  * Cycle count and instruction count only go together to create
1627                  * a valid IPC ratio when the cycle count changes.
1628                  */
1629                 ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
1630                 ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
1631         }
1632
1633         if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
1634                 if (state->type & INTEL_PT_CBR_CHG) {
1635                         err = intel_pt_synth_cbr_sample(ptq);
1636                         if (err)
1637                                 return err;
1638                 }
1639                 if (state->type & INTEL_PT_MWAIT_OP) {
1640                         err = intel_pt_synth_mwait_sample(ptq);
1641                         if (err)
1642                                 return err;
1643                 }
1644                 if (state->type & INTEL_PT_PWR_ENTRY) {
1645                         err = intel_pt_synth_pwre_sample(ptq);
1646                         if (err)
1647                                 return err;
1648                 }
1649                 if (state->type & INTEL_PT_EX_STOP) {
1650                         err = intel_pt_synth_exstop_sample(ptq);
1651                         if (err)
1652                                 return err;
1653                 }
1654                 if (state->type & INTEL_PT_PWR_EXIT) {
1655                         err = intel_pt_synth_pwrx_sample(ptq);
1656                         if (err)
1657                                 return err;
1658                 }
1659         }
1660
1661         if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) {
1662                 err = intel_pt_synth_instruction_sample(ptq);
1663                 if (err)
1664                         return err;
1665         }
1666
1667         if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {
1668                 err = intel_pt_synth_transaction_sample(ptq);
1669                 if (err)
1670                         return err;
1671         }
1672
1673         if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) {
1674                 err = intel_pt_synth_ptwrite_sample(ptq);
1675                 if (err)
1676                         return err;
1677         }
1678
1679         if (!(state->type & INTEL_PT_BRANCH))
1680                 return 0;
1681
1682         if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
1683                 thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip,
1684                                     state->to_ip, ptq->insn_len,
1685                                     state->trace_nr);
1686         else
1687                 thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
1688
1689         if (pt->sample_branches) {
1690                 err = intel_pt_synth_branch_sample(ptq);
1691                 if (err)
1692                         return err;
1693         }
1694
1695         if (pt->synth_opts.last_branch)
1696                 intel_pt_update_last_branch_rb(ptq);
1697
1698         if (!ptq->sync_switch)
1699                 return 0;
1700
1701         if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1702                 switch (ptq->switch_state) {
1703                 case INTEL_PT_SS_NOT_TRACING:
1704                 case INTEL_PT_SS_UNKNOWN:
1705                 case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1706                         err = intel_pt_next_tid(pt, ptq);
1707                         if (err)
1708                                 return err;
1709                         ptq->switch_state = INTEL_PT_SS_TRACING;
1710                         break;
1711                 default:
1712                         ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
1713                         return 1;
1714                 }
1715         } else if (!state->to_ip) {
1716                 ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1717         } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
1718                 ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1719         } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1720                    state->to_ip == pt->ptss_ip &&
1721                    (ptq->flags & PERF_IP_FLAG_CALL)) {
1722                 ptq->switch_state = INTEL_PT_SS_TRACING;
1723         }
1724
1725         return 0;
1726 }
1727
1728 static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
1729 {
1730         struct machine *machine = pt->machine;
1731         struct map *map;
1732         struct symbol *sym, *start;
1733         u64 ip, switch_ip = 0;
1734         const char *ptss;
1735
1736         if (ptss_ip)
1737                 *ptss_ip = 0;
1738
1739         map = machine__kernel_map(machine);
1740         if (!map)
1741                 return 0;
1742
1743         if (map__load(map))
1744                 return 0;
1745
1746         start = dso__first_symbol(map->dso);
1747
1748         for (sym = start; sym; sym = dso__next_symbol(sym)) {
1749                 if (sym->binding == STB_GLOBAL &&
1750                     !strcmp(sym->name, "__switch_to")) {
1751                         ip = map->unmap_ip(map, sym->start);
1752                         if (ip >= map->start && ip < map->end) {
1753                                 switch_ip = ip;
1754                                 break;
1755                         }
1756                 }
1757         }
1758
1759         if (!switch_ip || !ptss_ip)
1760                 return 0;
1761
1762         if (pt->have_sched_switch == 1)
1763                 ptss = "perf_trace_sched_switch";
1764         else
1765                 ptss = "__perf_event_task_sched_out";
1766
1767         for (sym = start; sym; sym = dso__next_symbol(sym)) {
1768                 if (!strcmp(sym->name, ptss)) {
1769                         ip = map->unmap_ip(map, sym->start);
1770                         if (ip >= map->start && ip < map->end) {
1771                                 *ptss_ip = ip;
1772                                 break;
1773                         }
1774                 }
1775         }
1776
1777         return switch_ip;
1778 }
1779
1780 static void intel_pt_enable_sync_switch(struct intel_pt *pt)
1781 {
1782         unsigned int i;
1783
1784         pt->sync_switch = true;
1785
1786         for (i = 0; i < pt->queues.nr_queues; i++) {
1787                 struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1788                 struct intel_pt_queue *ptq = queue->priv;
1789
1790                 if (ptq)
1791                         ptq->sync_switch = true;
1792         }
1793 }
1794
1795 /*
1796  * To filter against time ranges, it is only necessary to look at the next start
1797  * or end time.
1798  */
1799 static bool intel_pt_next_time(struct intel_pt_queue *ptq)
1800 {
1801         struct intel_pt *pt = ptq->pt;
1802
1803         if (ptq->sel_start) {
1804                 /* Next time is an end time */
1805                 ptq->sel_start = false;
1806                 ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end;
1807                 return true;
1808         } else if (ptq->sel_idx + 1 < pt->range_cnt) {
1809                 /* Next time is a start time */
1810                 ptq->sel_start = true;
1811                 ptq->sel_idx += 1;
1812                 ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start;
1813                 return true;
1814         }
1815
1816         /* No next time */
1817         return false;
1818 }
1819
1820 static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp)
1821 {
1822         int err;
1823
1824         while (1) {
1825                 if (ptq->sel_start) {
1826                         if (ptq->timestamp >= ptq->sel_timestamp) {
1827                                 /* After start time, so consider next time */
1828                                 intel_pt_next_time(ptq);
1829                                 if (!ptq->sel_timestamp) {
1830                                         /* No end time */
1831                                         return 0;
1832                                 }
1833                                 /* Check against end time */
1834                                 continue;
1835                         }
1836                         /* Before start time, so fast forward */
1837                         ptq->have_sample = false;
1838                         if (ptq->sel_timestamp > *ff_timestamp) {
1839                                 if (ptq->sync_switch) {
1840                                         intel_pt_next_tid(ptq->pt, ptq);
1841                                         ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1842                                 }
1843                                 *ff_timestamp = ptq->sel_timestamp;
1844                                 err = intel_pt_fast_forward(ptq->decoder,
1845                                                             ptq->sel_timestamp);
1846                                 if (err)
1847                                         return err;
1848                         }
1849                         return 0;
1850                 } else if (ptq->timestamp > ptq->sel_timestamp) {
1851                         /* After end time, so consider next time */
1852                         if (!intel_pt_next_time(ptq)) {
1853                                 /* No next time range, so stop decoding */
1854                                 ptq->have_sample = false;
1855                                 ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1856                                 return 1;
1857                         }
1858                         /* Check against next start time */
1859                         continue;
1860                 } else {
1861                         /* Before end time */
1862                         return 0;
1863                 }
1864         }
1865 }
1866
1867 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1868 {
1869         const struct intel_pt_state *state = ptq->state;
1870         struct intel_pt *pt = ptq->pt;
1871         u64 ff_timestamp = 0;
1872         int err;
1873
1874         if (!pt->kernel_start) {
1875                 pt->kernel_start = machine__kernel_start(pt->machine);
1876                 if (pt->per_cpu_mmaps &&
1877                     (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
1878                     !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
1879                     !pt->sampling_mode) {
1880                         pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
1881                         if (pt->switch_ip) {
1882                                 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
1883                                              pt->switch_ip, pt->ptss_ip);
1884                                 intel_pt_enable_sync_switch(pt);
1885                         }
1886                 }
1887         }
1888
1889         intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1890                      ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1891         while (1) {
1892                 err = intel_pt_sample(ptq);
1893                 if (err)
1894                         return err;
1895
1896                 state = intel_pt_decode(ptq->decoder);
1897                 if (state->err) {
1898                         if (state->err == INTEL_PT_ERR_NODATA)
1899                                 return 1;
1900                         if (ptq->sync_switch &&
1901                             state->from_ip >= pt->kernel_start) {
1902                                 ptq->sync_switch = false;
1903                                 intel_pt_next_tid(pt, ptq);
1904                         }
1905                         if (pt->synth_opts.errors) {
1906                                 err = intel_ptq_synth_error(ptq, state);
1907                                 if (err)
1908                                         return err;
1909                         }
1910                         continue;
1911                 }
1912
1913                 ptq->state = state;
1914                 ptq->have_sample = true;
1915                 intel_pt_sample_flags(ptq);
1916
1917                 /* Use estimated TSC upon return to user space */
1918                 if (pt->est_tsc &&
1919                     (state->from_ip >= pt->kernel_start || !state->from_ip) &&
1920                     state->to_ip && state->to_ip < pt->kernel_start) {
1921                         intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1922                                      state->timestamp, state->est_timestamp);
1923                         ptq->timestamp = state->est_timestamp;
1924                 /* Use estimated TSC in unknown switch state */
1925                 } else if (ptq->sync_switch &&
1926                            ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1927                            intel_pt_is_switch_ip(ptq, state->to_ip) &&
1928                            ptq->next_tid == -1) {
1929                         intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1930                                      state->timestamp, state->est_timestamp);
1931                         ptq->timestamp = state->est_timestamp;
1932                 } else if (state->timestamp > ptq->timestamp) {
1933                         ptq->timestamp = state->timestamp;
1934                 }
1935
1936                 if (ptq->sel_timestamp) {
1937                         err = intel_pt_time_filter(ptq, &ff_timestamp);
1938                         if (err)
1939                                 return err;
1940                 }
1941
1942                 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1943                         *timestamp = ptq->timestamp;
1944                         return 0;
1945                 }
1946         }
1947         return 0;
1948 }
1949
1950 static inline int intel_pt_update_queues(struct intel_pt *pt)
1951 {
1952         if (pt->queues.new_data) {
1953                 pt->queues.new_data = false;
1954                 return intel_pt_setup_queues(pt);
1955         }
1956         return 0;
1957 }
1958
1959 static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
1960 {
1961         unsigned int queue_nr;
1962         u64 ts;
1963         int ret;
1964
1965         while (1) {
1966                 struct auxtrace_queue *queue;
1967                 struct intel_pt_queue *ptq;
1968
1969                 if (!pt->heap.heap_cnt)
1970                         return 0;
1971
1972                 if (pt->heap.heap_array[0].ordinal >= timestamp)
1973                         return 0;
1974
1975                 queue_nr = pt->heap.heap_array[0].queue_nr;
1976                 queue = &pt->queues.queue_array[queue_nr];
1977                 ptq = queue->priv;
1978
1979                 intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
1980                              queue_nr, pt->heap.heap_array[0].ordinal,
1981                              timestamp);
1982
1983                 auxtrace_heap__pop(&pt->heap);
1984
1985                 if (pt->heap.heap_cnt) {
1986                         ts = pt->heap.heap_array[0].ordinal + 1;
1987                         if (ts > timestamp)
1988                                 ts = timestamp;
1989                 } else {
1990                         ts = timestamp;
1991                 }
1992
1993                 intel_pt_set_pid_tid_cpu(pt, queue);
1994
1995                 ret = intel_pt_run_decoder(ptq, &ts);
1996
1997                 if (ret < 0) {
1998                         auxtrace_heap__add(&pt->heap, queue_nr, ts);
1999                         return ret;
2000                 }
2001
2002                 if (!ret) {
2003                         ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
2004                         if (ret < 0)
2005                                 return ret;
2006                 } else {
2007                         ptq->on_heap = false;
2008                 }
2009         }
2010
2011         return 0;
2012 }
2013
2014 static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
2015                                             u64 time_)
2016 {
2017         struct auxtrace_queues *queues = &pt->queues;
2018         unsigned int i;
2019         u64 ts = 0;
2020
2021         for (i = 0; i < queues->nr_queues; i++) {
2022                 struct auxtrace_queue *queue = &pt->queues.queue_array[i];
2023                 struct intel_pt_queue *ptq = queue->priv;
2024
2025                 if (ptq && (tid == -1 || ptq->tid == tid)) {
2026                         ptq->time = time_;
2027                         intel_pt_set_pid_tid_cpu(pt, queue);
2028                         intel_pt_run_decoder(ptq, &ts);
2029                 }
2030         }
2031         return 0;
2032 }
2033
2034 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
2035 {
2036         return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
2037                                     sample->pid, sample->tid, 0, sample->time);
2038 }
2039
2040 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
2041 {
2042         unsigned i, j;
2043
2044         if (cpu < 0 || !pt->queues.nr_queues)
2045                 return NULL;
2046
2047         if ((unsigned)cpu >= pt->queues.nr_queues)
2048                 i = pt->queues.nr_queues - 1;
2049         else
2050                 i = cpu;
2051
2052         if (pt->queues.queue_array[i].cpu == cpu)
2053                 return pt->queues.queue_array[i].priv;
2054
2055         for (j = 0; i > 0; j++) {
2056                 if (pt->queues.queue_array[--i].cpu == cpu)
2057                         return pt->queues.queue_array[i].priv;
2058         }
2059
2060         for (; j < pt->queues.nr_queues; j++) {
2061                 if (pt->queues.queue_array[j].cpu == cpu)
2062                         return pt->queues.queue_array[j].priv;
2063         }
2064
2065         return NULL;
2066 }
2067
2068 static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
2069                                 u64 timestamp)
2070 {
2071         struct intel_pt_queue *ptq;
2072         int err;
2073
2074         if (!pt->sync_switch)
2075                 return 1;
2076
2077         ptq = intel_pt_cpu_to_ptq(pt, cpu);
2078         if (!ptq || !ptq->sync_switch)
2079                 return 1;
2080
2081         switch (ptq->switch_state) {
2082         case INTEL_PT_SS_NOT_TRACING:
2083                 break;
2084         case INTEL_PT_SS_UNKNOWN:
2085         case INTEL_PT_SS_TRACING:
2086                 ptq->next_tid = tid;
2087                 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
2088                 return 0;
2089         case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
2090                 if (!ptq->on_heap) {
2091                         ptq->timestamp = perf_time_to_tsc(timestamp,
2092                                                           &pt->tc);
2093                         err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
2094                                                  ptq->timestamp);
2095                         if (err)
2096                                 return err;
2097                         ptq->on_heap = true;
2098                 }
2099                 ptq->switch_state = INTEL_PT_SS_TRACING;
2100                 break;
2101         case INTEL_PT_SS_EXPECTING_SWITCH_IP:
2102                 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
2103                 break;
2104         default:
2105                 break;
2106         }
2107
2108         ptq->next_tid = -1;
2109
2110         return 1;
2111 }
2112
2113 static int intel_pt_process_switch(struct intel_pt *pt,
2114                                    struct perf_sample *sample)
2115 {
2116         struct perf_evsel *evsel;
2117         pid_t tid;
2118         int cpu, ret;
2119
2120         evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
2121         if (evsel != pt->switch_evsel)
2122                 return 0;
2123
2124         tid = perf_evsel__intval(evsel, sample, "next_pid");
2125         cpu = sample->cpu;
2126
2127         intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
2128                      cpu, tid, sample->time, perf_time_to_tsc(sample->time,
2129                      &pt->tc));
2130
2131         ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
2132         if (ret <= 0)
2133                 return ret;
2134
2135         return machine__set_current_tid(pt->machine, cpu, -1, tid);
2136 }
2137
2138 static int intel_pt_context_switch_in(struct intel_pt *pt,
2139                                       struct perf_sample *sample)
2140 {
2141         pid_t pid = sample->pid;
2142         pid_t tid = sample->tid;
2143         int cpu = sample->cpu;
2144
2145         if (pt->sync_switch) {
2146                 struct intel_pt_queue *ptq;
2147
2148                 ptq = intel_pt_cpu_to_ptq(pt, cpu);
2149                 if (ptq && ptq->sync_switch) {
2150                         ptq->next_tid = -1;
2151                         switch (ptq->switch_state) {
2152                         case INTEL_PT_SS_NOT_TRACING:
2153                         case INTEL_PT_SS_UNKNOWN:
2154                         case INTEL_PT_SS_TRACING:
2155                                 break;
2156                         case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
2157                         case INTEL_PT_SS_EXPECTING_SWITCH_IP:
2158                                 ptq->switch_state = INTEL_PT_SS_TRACING;
2159                                 break;
2160                         default:
2161                                 break;
2162                         }
2163                 }
2164         }
2165
2166         /*
2167          * If the current tid has not been updated yet, ensure it is now that
2168          * a "switch in" event has occurred.
2169          */
2170         if (machine__get_current_tid(pt->machine, cpu) == tid)
2171                 return 0;
2172
2173         return machine__set_current_tid(pt->machine, cpu, pid, tid);
2174 }
2175
2176 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
2177                                    struct perf_sample *sample)
2178 {
2179         bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2180         pid_t pid, tid;
2181         int cpu, ret;
2182
2183         cpu = sample->cpu;
2184
2185         if (pt->have_sched_switch == 3) {
2186                 if (!out)
2187                         return intel_pt_context_switch_in(pt, sample);
2188                 if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
2189                         pr_err("Expecting CPU-wide context switch event\n");
2190                         return -EINVAL;
2191                 }
2192                 pid = event->context_switch.next_prev_pid;
2193                 tid = event->context_switch.next_prev_tid;
2194         } else {
2195                 if (out)
2196                         return 0;
2197                 pid = sample->pid;
2198                 tid = sample->tid;
2199         }
2200
2201         if (tid == -1) {
2202                 pr_err("context_switch event has no tid\n");
2203                 return -EINVAL;
2204         }
2205
2206         intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
2207                      cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
2208                      &pt->tc));
2209
2210         ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
2211         if (ret <= 0)
2212                 return ret;
2213
2214         return machine__set_current_tid(pt->machine, cpu, pid, tid);
2215 }
2216
2217 static int intel_pt_process_itrace_start(struct intel_pt *pt,
2218                                          union perf_event *event,
2219                                          struct perf_sample *sample)
2220 {
2221         if (!pt->per_cpu_mmaps)
2222                 return 0;
2223
2224         intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
2225                      sample->cpu, event->itrace_start.pid,
2226                      event->itrace_start.tid, sample->time,
2227                      perf_time_to_tsc(sample->time, &pt->tc));
2228
2229         return machine__set_current_tid(pt->machine, sample->cpu,
2230                                         event->itrace_start.pid,
2231                                         event->itrace_start.tid);
2232 }
2233
2234 static int intel_pt_process_event(struct perf_session *session,
2235                                   union perf_event *event,
2236                                   struct perf_sample *sample,
2237                                   struct perf_tool *tool)
2238 {
2239         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2240                                            auxtrace);
2241         u64 timestamp;
2242         int err = 0;
2243
2244         if (dump_trace)
2245                 return 0;
2246
2247         if (!tool->ordered_events) {
2248                 pr_err("Intel Processor Trace requires ordered events\n");
2249                 return -EINVAL;
2250         }
2251
2252         if (sample->time && sample->time != (u64)-1)
2253                 timestamp = perf_time_to_tsc(sample->time, &pt->tc);
2254         else
2255                 timestamp = 0;
2256
2257         if (timestamp || pt->timeless_decoding) {
2258                 err = intel_pt_update_queues(pt);
2259                 if (err)
2260                         return err;
2261         }
2262
2263         if (pt->timeless_decoding) {
2264                 if (event->header.type == PERF_RECORD_EXIT) {
2265                         err = intel_pt_process_timeless_queues(pt,
2266                                                                event->fork.tid,
2267                                                                sample->time);
2268                 }
2269         } else if (timestamp) {
2270                 err = intel_pt_process_queues(pt, timestamp);
2271         }
2272         if (err)
2273                 return err;
2274
2275         if (event->header.type == PERF_RECORD_AUX &&
2276             (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
2277             pt->synth_opts.errors) {
2278                 err = intel_pt_lost(pt, sample);
2279                 if (err)
2280                         return err;
2281         }
2282
2283         if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
2284                 err = intel_pt_process_switch(pt, sample);
2285         else if (event->header.type == PERF_RECORD_ITRACE_START)
2286                 err = intel_pt_process_itrace_start(pt, event, sample);
2287         else if (event->header.type == PERF_RECORD_SWITCH ||
2288                  event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
2289                 err = intel_pt_context_switch(pt, event, sample);
2290
2291         intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
2292                      event->header.type, sample->cpu, sample->time, timestamp);
2293         intel_pt_log_event(event);
2294
2295         return err;
2296 }
2297
2298 static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
2299 {
2300         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2301                                            auxtrace);
2302         int ret;
2303
2304         if (dump_trace)
2305                 return 0;
2306
2307         if (!tool->ordered_events)
2308                 return -EINVAL;
2309
2310         ret = intel_pt_update_queues(pt);
2311         if (ret < 0)
2312                 return ret;
2313
2314         if (pt->timeless_decoding)
2315                 return intel_pt_process_timeless_queues(pt, -1,
2316                                                         MAX_TIMESTAMP - 1);
2317
2318         return intel_pt_process_queues(pt, MAX_TIMESTAMP);
2319 }
2320
2321 static void intel_pt_free_events(struct perf_session *session)
2322 {
2323         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2324                                            auxtrace);
2325         struct auxtrace_queues *queues = &pt->queues;
2326         unsigned int i;
2327
2328         for (i = 0; i < queues->nr_queues; i++) {
2329                 intel_pt_free_queue(queues->queue_array[i].priv);
2330                 queues->queue_array[i].priv = NULL;
2331         }
2332         intel_pt_log_disable();
2333         auxtrace_queues__free(queues);
2334 }
2335
2336 static void intel_pt_free(struct perf_session *session)
2337 {
2338         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2339                                            auxtrace);
2340
2341         auxtrace_heap__free(&pt->heap);
2342         intel_pt_free_events(session);
2343         session->auxtrace = NULL;
2344         thread__put(pt->unknown_thread);
2345         addr_filters__exit(&pt->filts);
2346         zfree(&pt->filter);
2347         zfree(&pt->time_ranges);
2348         free(pt);
2349 }
2350
2351 static int intel_pt_process_auxtrace_event(struct perf_session *session,
2352                                            union perf_event *event,
2353                                            struct perf_tool *tool __maybe_unused)
2354 {
2355         struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
2356                                            auxtrace);
2357
2358         if (!pt->data_queued) {
2359                 struct auxtrace_buffer *buffer;
2360                 off_t data_offset;
2361                 int fd = perf_data__fd(session->data);
2362                 int err;
2363
2364                 if (perf_data__is_pipe(session->data)) {
2365                         data_offset = 0;
2366                 } else {
2367                         data_offset = lseek(fd, 0, SEEK_CUR);
2368                         if (data_offset == -1)
2369                                 return -errno;
2370                 }
2371
2372                 err = auxtrace_queues__add_event(&pt->queues, session, event,
2373                                                  data_offset, &buffer);
2374                 if (err)
2375                         return err;
2376
2377                 /* Dump here now we have copied a piped trace out of the pipe */
2378                 if (dump_trace) {
2379                         if (auxtrace_buffer__get_data(buffer, fd)) {
2380                                 intel_pt_dump_event(pt, buffer->data,
2381                                                     buffer->size);
2382                                 auxtrace_buffer__put_data(buffer);
2383                         }
2384                 }
2385         }
2386
2387         return 0;
2388 }
2389
2390 struct intel_pt_synth {
2391         struct perf_tool dummy_tool;
2392         struct perf_session *session;
2393 };
2394
2395 static int intel_pt_event_synth(struct perf_tool *tool,
2396                                 union perf_event *event,
2397                                 struct perf_sample *sample __maybe_unused,
2398                                 struct machine *machine __maybe_unused)
2399 {
2400         struct intel_pt_synth *intel_pt_synth =
2401                         container_of(tool, struct intel_pt_synth, dummy_tool);
2402
2403         return perf_session__deliver_synth_event(intel_pt_synth->session, event,
2404                                                  NULL);
2405 }
2406
2407 static int intel_pt_synth_event(struct perf_session *session, const char *name,
2408                                 struct perf_event_attr *attr, u64 id)
2409 {
2410         struct intel_pt_synth intel_pt_synth;
2411         int err;
2412
2413         pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
2414                  name, id, (u64)attr->sample_type);
2415
2416         memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
2417         intel_pt_synth.session = session;
2418
2419         err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
2420                                           &id, intel_pt_event_synth);
2421         if (err)
2422                 pr_err("%s: failed to synthesize '%s' event type\n",
2423                        __func__, name);
2424
2425         return err;
2426 }
2427
2428 static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id,
2429                                     const char *name)
2430 {
2431         struct perf_evsel *evsel;
2432
2433         evlist__for_each_entry(evlist, evsel) {
2434                 if (evsel->id && evsel->id[0] == id) {
2435                         if (evsel->name)
2436                                 zfree(&evsel->name);
2437                         evsel->name = strdup(name);
2438                         break;
2439                 }
2440         }
2441 }
2442
2443 static struct perf_evsel *intel_pt_evsel(struct intel_pt *pt,
2444                                          struct perf_evlist *evlist)
2445 {
2446         struct perf_evsel *evsel;
2447
2448         evlist__for_each_entry(evlist, evsel) {
2449                 if (evsel->attr.type == pt->pmu_type && evsel->ids)
2450                         return evsel;
2451         }
2452
2453         return NULL;
2454 }
2455
2456 static int intel_pt_synth_events(struct intel_pt *pt,
2457                                  struct perf_session *session)
2458 {
2459         struct perf_evlist *evlist = session->evlist;
2460         struct perf_evsel *evsel = intel_pt_evsel(pt, evlist);
2461         struct perf_event_attr attr;
2462         u64 id;
2463         int err;
2464
2465         if (!evsel) {
2466                 pr_debug("There are no selected events with Intel Processor Trace data\n");
2467                 return 0;
2468         }
2469
2470         memset(&attr, 0, sizeof(struct perf_event_attr));
2471         attr.size = sizeof(struct perf_event_attr);
2472         attr.type = PERF_TYPE_HARDWARE;
2473         attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
2474         attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
2475                             PERF_SAMPLE_PERIOD;
2476         if (pt->timeless_decoding)
2477                 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
2478         else
2479                 attr.sample_type |= PERF_SAMPLE_TIME;
2480         if (!pt->per_cpu_mmaps)
2481                 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
2482         attr.exclude_user = evsel->attr.exclude_user;
2483         attr.exclude_kernel = evsel->attr.exclude_kernel;
2484         attr.exclude_hv = evsel->attr.exclude_hv;
2485         attr.exclude_host = evsel->attr.exclude_host;
2486         attr.exclude_guest = evsel->attr.exclude_guest;
2487         attr.sample_id_all = evsel->attr.sample_id_all;
2488         attr.read_format = evsel->attr.read_format;
2489
2490         id = evsel->id[0] + 1000000000;
2491         if (!id)
2492                 id = 1;
2493
2494         if (pt->synth_opts.branches) {
2495                 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
2496                 attr.sample_period = 1;
2497                 attr.sample_type |= PERF_SAMPLE_ADDR;
2498                 err = intel_pt_synth_event(session, "branches", &attr, id);
2499                 if (err)
2500                         return err;
2501                 pt->sample_branches = true;
2502                 pt->branches_sample_type = attr.sample_type;
2503                 pt->branches_id = id;
2504                 id += 1;
2505                 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
2506         }
2507
2508         if (pt->synth_opts.callchain)
2509                 attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
2510         if (pt->synth_opts.last_branch)
2511                 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
2512
2513         if (pt->synth_opts.instructions) {
2514                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
2515                 if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
2516                         attr.sample_period =
2517                                 intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
2518                 else
2519                         attr.sample_period = pt->synth_opts.period;
2520                 err = intel_pt_synth_event(session, "instructions", &attr, id);
2521                 if (err)
2522                         return err;
2523                 pt->sample_instructions = true;
2524                 pt->instructions_sample_type = attr.sample_type;
2525                 pt->instructions_id = id;
2526                 id += 1;
2527         }
2528
2529         attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD;
2530         attr.sample_period = 1;
2531
2532         if (pt->synth_opts.transactions) {
2533                 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
2534                 err = intel_pt_synth_event(session, "transactions", &attr, id);
2535                 if (err)
2536                         return err;
2537                 pt->sample_transactions = true;
2538                 pt->transactions_sample_type = attr.sample_type;
2539                 pt->transactions_id = id;
2540                 intel_pt_set_event_name(evlist, id, "transactions");
2541                 id += 1;
2542         }
2543
2544         attr.type = PERF_TYPE_SYNTH;
2545         attr.sample_type |= PERF_SAMPLE_RAW;
2546
2547         if (pt->synth_opts.ptwrites) {
2548                 attr.config = PERF_SYNTH_INTEL_PTWRITE;
2549                 err = intel_pt_synth_event(session, "ptwrite", &attr, id);
2550                 if (err)
2551                         return err;
2552                 pt->sample_ptwrites = true;
2553                 pt->ptwrites_sample_type = attr.sample_type;
2554                 pt->ptwrites_id = id;
2555                 intel_pt_set_event_name(evlist, id, "ptwrite");
2556                 id += 1;
2557         }
2558
2559         if (pt->synth_opts.pwr_events) {
2560                 pt->sample_pwr_events = true;
2561                 pt->pwr_events_sample_type = attr.sample_type;
2562
2563                 attr.config = PERF_SYNTH_INTEL_CBR;
2564                 err = intel_pt_synth_event(session, "cbr", &attr, id);
2565                 if (err)
2566                         return err;
2567                 pt->cbr_id = id;
2568                 intel_pt_set_event_name(evlist, id, "cbr");
2569                 id += 1;
2570         }
2571
2572         if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) {
2573                 attr.config = PERF_SYNTH_INTEL_MWAIT;
2574                 err = intel_pt_synth_event(session, "mwait", &attr, id);
2575                 if (err)
2576                         return err;
2577                 pt->mwait_id = id;
2578                 intel_pt_set_event_name(evlist, id, "mwait");
2579                 id += 1;
2580
2581                 attr.config = PERF_SYNTH_INTEL_PWRE;
2582                 err = intel_pt_synth_event(session, "pwre", &attr, id);
2583                 if (err)
2584                         return err;
2585                 pt->pwre_id = id;
2586                 intel_pt_set_event_name(evlist, id, "pwre");
2587                 id += 1;
2588
2589                 attr.config = PERF_SYNTH_INTEL_EXSTOP;
2590                 err = intel_pt_synth_event(session, "exstop", &attr, id);
2591                 if (err)
2592                         return err;
2593                 pt->exstop_id = id;
2594                 intel_pt_set_event_name(evlist, id, "exstop");
2595                 id += 1;
2596
2597                 attr.config = PERF_SYNTH_INTEL_PWRX;
2598                 err = intel_pt_synth_event(session, "pwrx", &attr, id);
2599                 if (err)
2600                         return err;
2601                 pt->pwrx_id = id;
2602                 intel_pt_set_event_name(evlist, id, "pwrx");
2603                 id += 1;
2604         }
2605
2606         return 0;
2607 }
2608
2609 static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
2610 {
2611         struct perf_evsel *evsel;
2612
2613         evlist__for_each_entry_reverse(evlist, evsel) {
2614                 const char *name = perf_evsel__name(evsel);
2615
2616                 if (!strcmp(name, "sched:sched_switch"))
2617                         return evsel;
2618         }
2619
2620         return NULL;
2621 }
2622
2623 static bool intel_pt_find_switch(struct perf_evlist *evlist)
2624 {
2625         struct perf_evsel *evsel;
2626
2627         evlist__for_each_entry(evlist, evsel) {
2628                 if (evsel->attr.context_switch)
2629                         return true;
2630         }
2631
2632         return false;
2633 }
2634
2635 static int intel_pt_perf_config(const char *var, const char *value, void *data)
2636 {
2637         struct intel_pt *pt = data;
2638
2639         if (!strcmp(var, "intel-pt.mispred-all"))
2640                 pt->mispred_all = perf_config_bool(var, value);
2641
2642         return 0;
2643 }
2644
2645 /* Find least TSC which converts to ns or later */
2646 static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt)
2647 {
2648         u64 tsc, tm;
2649
2650         tsc = perf_time_to_tsc(ns, &pt->tc);
2651
2652         while (1) {
2653                 tm = tsc_to_perf_time(tsc, &pt->tc);
2654                 if (tm < ns)
2655                         break;
2656                 tsc -= 1;
2657         }
2658
2659         while (tm < ns)
2660                 tm = tsc_to_perf_time(++tsc, &pt->tc);
2661
2662         return tsc;
2663 }
2664
2665 /* Find greatest TSC which converts to ns or earlier */
2666 static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt)
2667 {
2668         u64 tsc, tm;
2669
2670         tsc = perf_time_to_tsc(ns, &pt->tc);
2671
2672         while (1) {
2673                 tm = tsc_to_perf_time(tsc, &pt->tc);
2674                 if (tm > ns)
2675                         break;
2676                 tsc += 1;
2677         }
2678
2679         while (tm > ns)
2680                 tm = tsc_to_perf_time(--tsc, &pt->tc);
2681
2682         return tsc;
2683 }
2684
2685 static int intel_pt_setup_time_ranges(struct intel_pt *pt,
2686                                       struct itrace_synth_opts *opts)
2687 {
2688         struct perf_time_interval *p = opts->ptime_range;
2689         int n = opts->range_num;
2690         int i;
2691
2692         if (!n || !p || pt->timeless_decoding)
2693                 return 0;
2694
2695         pt->time_ranges = calloc(n, sizeof(struct range));
2696         if (!pt->time_ranges)
2697                 return -ENOMEM;
2698
2699         pt->range_cnt = n;
2700
2701         intel_pt_log("%s: %u range(s)\n", __func__, n);
2702
2703         for (i = 0; i < n; i++) {
2704                 struct range *r = &pt->time_ranges[i];
2705                 u64 ts = p[i].start;
2706                 u64 te = p[i].end;
2707
2708                 /*
2709                  * Take care to ensure the TSC range matches the perf-time range
2710                  * when converted back to perf-time.
2711                  */
2712                 r->start = ts ? intel_pt_tsc_start(ts, pt) : 0;
2713                 r->end   = te ? intel_pt_tsc_end(te, pt) : 0;
2714
2715                 intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n",
2716                              i, ts, te);
2717                 intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n",
2718                              i, r->start, r->end);
2719         }
2720
2721         return 0;
2722 }
2723
2724 static const char * const intel_pt_info_fmts[] = {
2725         [INTEL_PT_PMU_TYPE]             = "  PMU Type            %"PRId64"\n",
2726         [INTEL_PT_TIME_SHIFT]           = "  Time Shift          %"PRIu64"\n",
2727         [INTEL_PT_TIME_MULT]            = "  Time Muliplier      %"PRIu64"\n",
2728         [INTEL_PT_TIME_ZERO]            = "  Time Zero           %"PRIu64"\n",
2729         [INTEL_PT_CAP_USER_TIME_ZERO]   = "  Cap Time Zero       %"PRId64"\n",
2730         [INTEL_PT_TSC_BIT]              = "  TSC bit             %#"PRIx64"\n",
2731         [INTEL_PT_NORETCOMP_BIT]        = "  NoRETComp bit       %#"PRIx64"\n",
2732         [INTEL_PT_HAVE_SCHED_SWITCH]    = "  Have sched_switch   %"PRId64"\n",
2733         [INTEL_PT_SNAPSHOT_MODE]        = "  Snapshot mode       %"PRId64"\n",
2734         [INTEL_PT_PER_CPU_MMAPS]        = "  Per-cpu maps        %"PRId64"\n",
2735         [INTEL_PT_MTC_BIT]              = "  MTC bit             %#"PRIx64"\n",
2736         [INTEL_PT_TSC_CTC_N]            = "  TSC:CTC numerator   %"PRIu64"\n",
2737         [INTEL_PT_TSC_CTC_D]            = "  TSC:CTC denominator %"PRIu64"\n",
2738         [INTEL_PT_CYC_BIT]              = "  CYC bit             %#"PRIx64"\n",
2739         [INTEL_PT_MAX_NONTURBO_RATIO]   = "  Max non-turbo ratio %"PRIu64"\n",
2740         [INTEL_PT_FILTER_STR_LEN]       = "  Filter string len.  %"PRIu64"\n",
2741 };
2742
2743 static void intel_pt_print_info(u64 *arr, int start, int finish)
2744 {
2745         int i;
2746
2747         if (!dump_trace)
2748                 return;
2749
2750         for (i = start; i <= finish; i++)
2751                 fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
2752 }
2753
2754 static void intel_pt_print_info_str(const char *name, const char *str)
2755 {
2756         if (!dump_trace)
2757                 return;
2758
2759         fprintf(stdout, "  %-20s%s\n", name, str ? str : "");
2760 }
2761
2762 static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
2763 {
2764         return auxtrace_info->header.size >=
2765                 sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
2766 }
2767
2768 int intel_pt_process_auxtrace_info(union perf_event *event,
2769                                    struct perf_session *session)
2770 {
2771         struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
2772         size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
2773         struct intel_pt *pt;
2774         void *info_end;
2775         u64 *info;
2776         int err;
2777
2778         if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
2779                                         min_sz)
2780                 return -EINVAL;
2781
2782         pt = zalloc(sizeof(struct intel_pt));
2783         if (!pt)
2784                 return -ENOMEM;
2785
2786         addr_filters__init(&pt->filts);
2787
2788         err = perf_config(intel_pt_perf_config, pt);
2789         if (err)
2790                 goto err_free;
2791
2792         err = auxtrace_queues__init(&pt->queues);
2793         if (err)
2794                 goto err_free;
2795
2796         intel_pt_log_set_name(INTEL_PT_PMU_NAME);
2797
2798         pt->session = session;
2799         pt->machine = &session->machines.host; /* No kvm support */
2800         pt->auxtrace_type = auxtrace_info->type;
2801         pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
2802         pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
2803         pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
2804         pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
2805         pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
2806         pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
2807         pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
2808         pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
2809         pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
2810         pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
2811         intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
2812                             INTEL_PT_PER_CPU_MMAPS);
2813
2814         if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
2815                 pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
2816                 pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
2817                 pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
2818                 pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
2819                 pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
2820                 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
2821                                     INTEL_PT_CYC_BIT);
2822         }
2823
2824         if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
2825                 pt->max_non_turbo_ratio =
2826                         auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
2827                 intel_pt_print_info(&auxtrace_info->priv[0],
2828                                     INTEL_PT_MAX_NONTURBO_RATIO,
2829                                     INTEL_PT_MAX_NONTURBO_RATIO);
2830         }
2831
2832         info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
2833         info_end = (void *)info + auxtrace_info->header.size;
2834
2835         if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
2836                 size_t len;
2837
2838                 len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
2839                 intel_pt_print_info(&auxtrace_info->priv[0],
2840                                     INTEL_PT_FILTER_STR_LEN,
2841                                     INTEL_PT_FILTER_STR_LEN);
2842                 if (len) {
2843                         const char *filter = (const char *)info;
2844
2845                         len = roundup(len + 1, 8);
2846                         info += len >> 3;
2847                         if ((void *)info > info_end) {
2848                                 pr_err("%s: bad filter string length\n", __func__);
2849                                 err = -EINVAL;
2850                                 goto err_free_queues;
2851                         }
2852                         pt->filter = memdup(filter, len);
2853                         if (!pt->filter) {
2854                                 err = -ENOMEM;
2855                                 goto err_free_queues;
2856                         }
2857                         if (session->header.needs_swap)
2858                                 mem_bswap_64(pt->filter, len);
2859                         if (pt->filter[len - 1]) {
2860                                 pr_err("%s: filter string not null terminated\n", __func__);
2861                                 err = -EINVAL;
2862                                 goto err_free_queues;
2863                         }
2864                         err = addr_filters__parse_bare_filter(&pt->filts,
2865                                                               filter);
2866                         if (err)
2867                                 goto err_free_queues;
2868                 }
2869                 intel_pt_print_info_str("Filter string", pt->filter);
2870         }
2871
2872         pt->timeless_decoding = intel_pt_timeless_decoding(pt);
2873         if (pt->timeless_decoding && !pt->tc.time_mult)
2874                 pt->tc.time_mult = 1;
2875         pt->have_tsc = intel_pt_have_tsc(pt);
2876         pt->sampling_mode = false;
2877         pt->est_tsc = !pt->timeless_decoding;
2878
2879         pt->unknown_thread = thread__new(999999999, 999999999);
2880         if (!pt->unknown_thread) {
2881                 err = -ENOMEM;
2882                 goto err_free_queues;
2883         }
2884
2885         /*
2886          * Since this thread will not be kept in any rbtree not in a
2887          * list, initialize its list node so that at thread__put() the
2888          * current thread lifetime assuption is kept and we don't segfault
2889          * at list_del_init().
2890          */
2891         INIT_LIST_HEAD(&pt->unknown_thread->node);
2892
2893         err = thread__set_comm(pt->unknown_thread, "unknown", 0);
2894         if (err)
2895                 goto err_delete_thread;
2896         if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
2897                 err = -ENOMEM;
2898                 goto err_delete_thread;
2899         }
2900
2901         pt->auxtrace.process_event = intel_pt_process_event;
2902         pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
2903         pt->auxtrace.flush_events = intel_pt_flush;
2904         pt->auxtrace.free_events = intel_pt_free_events;
2905         pt->auxtrace.free = intel_pt_free;
2906         session->auxtrace = &pt->auxtrace;
2907
2908         if (dump_trace)
2909                 return 0;
2910
2911         if (pt->have_sched_switch == 1) {
2912                 pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
2913                 if (!pt->switch_evsel) {
2914                         pr_err("%s: missing sched_switch event\n", __func__);
2915                         err = -EINVAL;
2916                         goto err_delete_thread;
2917                 }
2918         } else if (pt->have_sched_switch == 2 &&
2919                    !intel_pt_find_switch(session->evlist)) {
2920                 pr_err("%s: missing context_switch attribute flag\n", __func__);
2921                 err = -EINVAL;
2922                 goto err_delete_thread;
2923         }
2924
2925         if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
2926                 pt->synth_opts = *session->itrace_synth_opts;
2927         } else {
2928                 itrace_synth_opts__set_default(&pt->synth_opts,
2929                                 session->itrace_synth_opts->default_no_sample);
2930                 if (!session->itrace_synth_opts->default_no_sample &&
2931                     !session->itrace_synth_opts->inject) {
2932                         pt->synth_opts.branches = false;
2933                         pt->synth_opts.callchain = true;
2934                 }
2935                 if (session->itrace_synth_opts)
2936                         pt->synth_opts.thread_stack =
2937                                 session->itrace_synth_opts->thread_stack;
2938         }
2939
2940         if (pt->synth_opts.log)
2941                 intel_pt_log_enable();
2942
2943         /* Maximum non-turbo ratio is TSC freq / 100 MHz */
2944         if (pt->tc.time_mult) {
2945                 u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
2946
2947                 if (!pt->max_non_turbo_ratio)
2948                         pt->max_non_turbo_ratio =
2949                                         (tsc_freq + 50000000) / 100000000;
2950                 intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
2951                 intel_pt_log("Maximum non-turbo ratio %u\n",
2952                              pt->max_non_turbo_ratio);
2953                 pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
2954         }
2955
2956         if (session->itrace_synth_opts) {
2957                 err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts);
2958                 if (err)
2959                         goto err_delete_thread;
2960         }
2961
2962         if (pt->synth_opts.calls)
2963                 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
2964                                        PERF_IP_FLAG_TRACE_END;
2965         if (pt->synth_opts.returns)
2966                 pt->branches_filter |= PERF_IP_FLAG_RETURN |
2967                                        PERF_IP_FLAG_TRACE_BEGIN;
2968
2969         if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
2970                 symbol_conf.use_callchain = true;
2971                 if (callchain_register_param(&callchain_param) < 0) {
2972                         symbol_conf.use_callchain = false;
2973                         pt->synth_opts.callchain = false;
2974                 }
2975         }
2976
2977         err = intel_pt_synth_events(pt, session);
2978         if (err)
2979                 goto err_delete_thread;
2980
2981         err = auxtrace_queues__process_index(&pt->queues, session);
2982         if (err)
2983                 goto err_delete_thread;
2984
2985         if (pt->queues.populated)
2986                 pt->data_queued = true;
2987
2988         if (pt->timeless_decoding)
2989                 pr_debug2("Intel PT decoding without timestamps\n");
2990
2991         return 0;
2992
2993 err_delete_thread:
2994         thread__zput(pt->unknown_thread);
2995 err_free_queues:
2996         intel_pt_log_disable();
2997         auxtrace_queues__free(&pt->queues);
2998         session->auxtrace = NULL;
2999 err_free:
3000         addr_filters__exit(&pt->filts);
3001         zfree(&pt->filter);
3002         zfree(&pt->time_ranges);
3003         free(pt);
3004         return err;
3005 }