2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
4 * Parts came from builtin-{top,stat,record}.c, see those files for further
7 * Released under the GPL v2. (and only v2, not any later version)
16 #include "thread_map.h"
19 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
20 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0))
22 int __perf_evsel__sample_size(u64 sample_type)
24 u64 mask = sample_type & PERF_SAMPLE_MASK;
28 for (i = 0; i < 64; i++) {
29 if (mask & (1ULL << i))
38 void hists__init(struct hists *hists)
40 memset(hists, 0, sizeof(*hists));
41 hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
42 hists->entries_in = &hists->entries_in_array[0];
43 hists->entries_collapsed = RB_ROOT;
44 hists->entries = RB_ROOT;
45 pthread_mutex_init(&hists->lock, NULL);
48 void perf_evsel__init(struct perf_evsel *evsel,
49 struct perf_event_attr *attr, int idx)
53 INIT_LIST_HEAD(&evsel->node);
54 hists__init(&evsel->hists);
57 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
59 struct perf_evsel *evsel = zalloc(sizeof(*evsel));
62 perf_evsel__init(evsel, attr, idx);
67 void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts,
68 struct perf_evsel *first)
70 struct perf_event_attr *attr = &evsel->attr;
71 int track = !evsel->idx; /* only the first counter needs these */
74 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
75 attr->inherit = !opts->no_inherit;
76 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
77 PERF_FORMAT_TOTAL_TIME_RUNNING |
80 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
83 * We default some events to a 1 default interval. But keep
84 * it a weak assumption overridable by the user.
86 if (!attr->sample_period || (opts->user_freq != UINT_MAX &&
87 opts->user_interval != ULLONG_MAX)) {
89 attr->sample_type |= PERF_SAMPLE_PERIOD;
91 attr->sample_freq = opts->freq;
93 attr->sample_period = opts->default_interval;
98 attr->sample_freq = 0;
100 if (opts->inherit_stat)
101 attr->inherit_stat = 1;
103 if (opts->sample_address) {
104 attr->sample_type |= PERF_SAMPLE_ADDR;
105 attr->mmap_data = track;
108 if (opts->call_graph)
109 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
111 if (perf_target__has_cpu(&opts->target))
112 attr->sample_type |= PERF_SAMPLE_CPU;
115 attr->sample_type |= PERF_SAMPLE_PERIOD;
117 if (!opts->sample_id_all_missing &&
118 (opts->sample_time || !opts->no_inherit ||
119 perf_target__has_cpu(&opts->target)))
120 attr->sample_type |= PERF_SAMPLE_TIME;
122 if (opts->raw_samples) {
123 attr->sample_type |= PERF_SAMPLE_TIME;
124 attr->sample_type |= PERF_SAMPLE_RAW;
125 attr->sample_type |= PERF_SAMPLE_CPU;
128 if (opts->no_delay) {
130 attr->wakeup_events = 1;
132 if (opts->branch_stack) {
133 attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
134 attr->branch_sample_type = opts->branch_stack;
140 if (perf_target__none(&opts->target) &&
141 (!opts->group || evsel == first)) {
142 attr->enable_on_exec = 1;
146 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
149 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
152 for (cpu = 0; cpu < ncpus; cpu++) {
153 for (thread = 0; thread < nthreads; thread++) {
154 FD(evsel, cpu, thread) = -1;
159 return evsel->fd != NULL ? 0 : -ENOMEM;
162 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
164 evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
165 if (evsel->sample_id == NULL)
168 evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
169 if (evsel->id == NULL) {
170 xyarray__delete(evsel->sample_id);
171 evsel->sample_id = NULL;
178 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
180 evsel->counts = zalloc((sizeof(*evsel->counts) +
181 (ncpus * sizeof(struct perf_counts_values))));
182 return evsel->counts != NULL ? 0 : -ENOMEM;
185 void perf_evsel__free_fd(struct perf_evsel *evsel)
187 xyarray__delete(evsel->fd);
191 void perf_evsel__free_id(struct perf_evsel *evsel)
193 xyarray__delete(evsel->sample_id);
194 evsel->sample_id = NULL;
199 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
203 for (cpu = 0; cpu < ncpus; cpu++)
204 for (thread = 0; thread < nthreads; ++thread) {
205 close(FD(evsel, cpu, thread));
206 FD(evsel, cpu, thread) = -1;
210 void perf_evsel__exit(struct perf_evsel *evsel)
212 assert(list_empty(&evsel->node));
213 xyarray__delete(evsel->fd);
214 xyarray__delete(evsel->sample_id);
218 void perf_evsel__delete(struct perf_evsel *evsel)
220 perf_evsel__exit(evsel);
221 close_cgroup(evsel->cgrp);
226 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
227 int cpu, int thread, bool scale)
229 struct perf_counts_values count;
230 size_t nv = scale ? 3 : 1;
232 if (FD(evsel, cpu, thread) < 0)
235 if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
238 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
244 else if (count.run < count.ena)
245 count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
247 count.ena = count.run = 0;
249 evsel->counts->cpu[cpu] = count;
253 int __perf_evsel__read(struct perf_evsel *evsel,
254 int ncpus, int nthreads, bool scale)
256 size_t nv = scale ? 3 : 1;
258 struct perf_counts_values *aggr = &evsel->counts->aggr, count;
260 aggr->val = aggr->ena = aggr->run = 0;
262 for (cpu = 0; cpu < ncpus; cpu++) {
263 for (thread = 0; thread < nthreads; thread++) {
264 if (FD(evsel, cpu, thread) < 0)
267 if (readn(FD(evsel, cpu, thread),
268 &count, nv * sizeof(u64)) < 0)
271 aggr->val += count.val;
273 aggr->ena += count.ena;
274 aggr->run += count.run;
279 evsel->counts->scaled = 0;
281 if (aggr->run == 0) {
282 evsel->counts->scaled = -1;
287 if (aggr->run < aggr->ena) {
288 evsel->counts->scaled = 1;
289 aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
292 aggr->ena = aggr->run = 0;
297 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
298 struct thread_map *threads, bool group,
299 struct xyarray *group_fds)
302 unsigned long flags = 0;
305 if (evsel->fd == NULL &&
306 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
310 flags = PERF_FLAG_PID_CGROUP;
311 pid = evsel->cgrp->fd;
314 for (cpu = 0; cpu < cpus->nr; cpu++) {
315 int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1;
317 for (thread = 0; thread < threads->nr; thread++) {
320 pid = threads->map[thread];
322 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
326 if (FD(evsel, cpu, thread) < 0) {
331 if (group && group_fd == -1)
332 group_fd = FD(evsel, cpu, thread);
340 while (--thread >= 0) {
341 close(FD(evsel, cpu, thread));
342 FD(evsel, cpu, thread) = -1;
344 thread = threads->nr;
345 } while (--cpu >= 0);
349 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads)
351 if (evsel->fd == NULL)
354 perf_evsel__close_fd(evsel, ncpus, nthreads);
355 perf_evsel__free_fd(evsel);
368 struct thread_map map;
370 } empty_thread_map = {
375 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
376 struct thread_map *threads, bool group,
377 struct xyarray *group_fd)
380 /* Work around old compiler warnings about strict aliasing */
381 cpus = &empty_cpu_map.map;
385 threads = &empty_thread_map.map;
387 return __perf_evsel__open(evsel, cpus, threads, group, group_fd);
390 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
391 struct cpu_map *cpus, bool group,
392 struct xyarray *group_fd)
394 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group,
398 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
399 struct thread_map *threads, bool group,
400 struct xyarray *group_fd)
402 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group,
406 static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
407 struct perf_sample *sample)
409 const u64 *array = event->sample.array;
411 array += ((event->header.size -
412 sizeof(event->header)) / sizeof(u64)) - 1;
414 if (type & PERF_SAMPLE_CPU) {
415 u32 *p = (u32 *)array;
420 if (type & PERF_SAMPLE_STREAM_ID) {
421 sample->stream_id = *array;
425 if (type & PERF_SAMPLE_ID) {
430 if (type & PERF_SAMPLE_TIME) {
431 sample->time = *array;
435 if (type & PERF_SAMPLE_TID) {
436 u32 *p = (u32 *)array;
444 static bool sample_overlap(const union perf_event *event,
445 const void *offset, u64 size)
447 const void *base = event;
449 if (offset + size > base + event->header.size)
455 int perf_event__parse_sample(const union perf_event *event, u64 type,
456 int sample_size, bool sample_id_all,
457 struct perf_sample *data, bool swapped)
462 * used for cross-endian analysis. See git commit 65014ab3
463 * for why this goofiness is needed.
467 memset(data, 0, sizeof(*data));
468 data->cpu = data->pid = data->tid = -1;
469 data->stream_id = data->id = data->time = -1ULL;
472 if (event->header.type != PERF_RECORD_SAMPLE) {
475 return perf_event__parse_id_sample(event, type, data);
478 array = event->sample.array;
480 if (sample_size + sizeof(event->header) > event->header.size)
483 if (type & PERF_SAMPLE_IP) {
484 data->ip = event->ip.ip;
488 if (type & PERF_SAMPLE_TID) {
491 /* undo swap of u64, then swap on individual u32s */
492 u.val64 = bswap_64(u.val64);
493 u.val32[0] = bswap_32(u.val32[0]);
494 u.val32[1] = bswap_32(u.val32[1]);
497 data->pid = u.val32[0];
498 data->tid = u.val32[1];
502 if (type & PERF_SAMPLE_TIME) {
508 if (type & PERF_SAMPLE_ADDR) {
514 if (type & PERF_SAMPLE_ID) {
519 if (type & PERF_SAMPLE_STREAM_ID) {
520 data->stream_id = *array;
524 if (type & PERF_SAMPLE_CPU) {
528 /* undo swap of u64, then swap on individual u32s */
529 u.val64 = bswap_64(u.val64);
530 u.val32[0] = bswap_32(u.val32[0]);
533 data->cpu = u.val32[0];
537 if (type & PERF_SAMPLE_PERIOD) {
538 data->period = *array;
542 if (type & PERF_SAMPLE_READ) {
543 fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n");
547 if (type & PERF_SAMPLE_CALLCHAIN) {
548 if (sample_overlap(event, array, sizeof(data->callchain->nr)))
551 data->callchain = (struct ip_callchain *)array;
553 if (sample_overlap(event, array, data->callchain->nr))
556 array += 1 + data->callchain->nr;
559 if (type & PERF_SAMPLE_RAW) {
563 if (WARN_ONCE(swapped,
564 "Endianness of raw data not corrected!\n")) {
565 /* undo swap of u64, then swap on individual u32s */
566 u.val64 = bswap_64(u.val64);
567 u.val32[0] = bswap_32(u.val32[0]);
568 u.val32[1] = bswap_32(u.val32[1]);
571 if (sample_overlap(event, array, sizeof(u32)))
574 data->raw_size = u.val32[0];
575 pdata = (void *) array + sizeof(u32);
577 if (sample_overlap(event, pdata, data->raw_size))
580 data->raw_data = (void *) pdata;
582 array = (void *)array + data->raw_size + sizeof(u32);
585 if (type & PERF_SAMPLE_BRANCH_STACK) {
588 data->branch_stack = (struct branch_stack *)array;
591 sz = data->branch_stack->nr * sizeof(struct branch_entry);
598 int perf_event__synthesize_sample(union perf_event *event, u64 type,
599 const struct perf_sample *sample,
605 * used for cross-endian analysis. See git commit 65014ab3
606 * for why this goofiness is needed.
610 array = event->sample.array;
612 if (type & PERF_SAMPLE_IP) {
613 event->ip.ip = sample->ip;
617 if (type & PERF_SAMPLE_TID) {
618 u.val32[0] = sample->pid;
619 u.val32[1] = sample->tid;
622 * Inverse of what is done in perf_event__parse_sample
624 u.val32[0] = bswap_32(u.val32[0]);
625 u.val32[1] = bswap_32(u.val32[1]);
626 u.val64 = bswap_64(u.val64);
633 if (type & PERF_SAMPLE_TIME) {
634 *array = sample->time;
638 if (type & PERF_SAMPLE_ADDR) {
639 *array = sample->addr;
643 if (type & PERF_SAMPLE_ID) {
648 if (type & PERF_SAMPLE_STREAM_ID) {
649 *array = sample->stream_id;
653 if (type & PERF_SAMPLE_CPU) {
654 u.val32[0] = sample->cpu;
657 * Inverse of what is done in perf_event__parse_sample
659 u.val32[0] = bswap_32(u.val32[0]);
660 u.val64 = bswap_64(u.val64);
666 if (type & PERF_SAMPLE_PERIOD) {
667 *array = sample->period;