perf report: Show the IP only in --verbose mode
[linux-block.git] / Documentation / perf_counter / builtin-stat.c
CommitLineData
ddcacfa0 1/*
5242519b 2 * perf stat: /usr/bin/time -alike performance counter statistics utility
ddcacfa0
IM
3
4 It summarizes the counter events of all tasks (and child tasks),
5 covering all CPUs that the command (or workload) executes on.
6 It only counts the per-task events of the workload started,
7 independent of how many other tasks run on those CPUs.
8
9 Sample output:
10
5242519b 11 $ perf stat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
ddcacfa0
IM
12
13 Performance counter stats for 'ls':
14
15 163516953 instructions
16 2295 cache-misses
17 2855182 branch-misses
5242519b
IM
18 *
19 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
20 *
21 * Improvements and fixes by:
22 *
23 * Arjan van de Ven <arjan@linux.intel.com>
24 * Yanmin Zhang <yanmin.zhang@intel.com>
25 * Wu Fengguang <fengguang.wu@intel.com>
26 * Mike Galbraith <efault@gmx.de>
27 * Paul Mackerras <paulus@samba.org>
28 *
29 * Released under the GPL v2. (and only v2, not any later version)
ddcacfa0
IM
30 */
31
1a482f38 32#include "perf.h"
148be2c1 33#include "util/util.h"
5242519b
IM
34#include "util/parse-options.h"
35#include "util/parse-events.h"
ddcacfa0 36
ddcacfa0 37#include <sys/prctl.h>
16c8a109 38
ddcacfa0 39static int system_wide = 0;
5242519b 40static int inherit = 1;
ddcacfa0 41
5242519b 42static __u64 default_event_id[MAX_COUNTERS] = {
ddcacfa0
IM
43 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
44 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
45 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
46 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
47
48 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
49 EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
50 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
51 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
52};
5242519b 53
ddcacfa0
IM
54static int default_interval = 100000;
55static int event_count[MAX_COUNTERS];
56static int fd[MAX_NR_CPUS][MAX_COUNTERS];
57
5242519b 58static int target_pid = -1;
ddcacfa0 59static int nr_cpus = 0;
ddcacfa0
IM
60static unsigned int page_size;
61
66cf7829 62static int scale = 1;
ddcacfa0
IM
63
64static const unsigned int default_count[] = {
65 1000000,
66 1000000,
67 10000,
68 10000,
69 1000000,
70 10000,
71};
72
ddcacfa0
IM
73static void create_perfstat_counter(int counter)
74{
75 struct perf_counter_hw_event hw_event;
76
77 memset(&hw_event, 0, sizeof(hw_event));
78 hw_event.config = event_id[counter];
79 hw_event.record_type = 0;
5242519b 80 hw_event.nmi = 1;
16c8a109
PZ
81 hw_event.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL;
82 hw_event.exclude_user = event_mask[counter] & EVENT_MASK_USER;
83
ddcacfa0
IM
84 if (scale)
85 hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
86 PERF_FORMAT_TOTAL_TIME_RUNNING;
87
88 if (system_wide) {
89 int cpu;
90 for (cpu = 0; cpu < nr_cpus; cpu ++) {
91 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
92 if (fd[cpu][counter] < 0) {
93 printf("perfstat error: syscall returned with %d (%s)\n",
94 fd[cpu][counter], strerror(errno));
95 exit(-1);
96 }
97 }
98 } else {
5242519b 99 hw_event.inherit = inherit;
ddcacfa0
IM
100 hw_event.disabled = 1;
101
102 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
103 if (fd[0][counter] < 0) {
104 printf("perfstat error: syscall returned with %d (%s)\n",
105 fd[0][counter], strerror(errno));
106 exit(-1);
107 }
108 }
109}
110
5242519b 111int do_perfstat(int argc, const char **argv)
ddcacfa0
IM
112{
113 unsigned long long t0, t1;
114 int counter;
115 ssize_t res;
116 int status;
117 int pid;
118
119 if (!system_wide)
120 nr_cpus = 1;
121
122 for (counter = 0; counter < nr_counters; counter++)
123 create_perfstat_counter(counter);
124
ddcacfa0
IM
125 /*
126 * Enable counters and exec the command:
127 */
128 t0 = rdclock();
129 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
130
131 if ((pid = fork()) < 0)
132 perror("failed to fork");
133 if (!pid) {
5242519b 134 if (execvp(argv[0], (char **)argv)) {
ddcacfa0
IM
135 perror(argv[0]);
136 exit(-1);
137 }
138 }
139 while (wait(&status) >= 0)
140 ;
141 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
142 t1 = rdclock();
143
144 fflush(stdout);
145
146 fprintf(stderr, "\n");
147 fprintf(stderr, " Performance counter stats for \'%s\':\n",
148 argv[0]);
149 fprintf(stderr, "\n");
150
151 for (counter = 0; counter < nr_counters; counter++) {
152 int cpu, nv;
153 __u64 count[3], single_count[3];
154 int scaled;
155
156 count[0] = count[1] = count[2] = 0;
157 nv = scale ? 3 : 1;
158 for (cpu = 0; cpu < nr_cpus; cpu ++) {
159 res = read(fd[cpu][counter],
160 single_count, nv * sizeof(__u64));
161 assert(res == nv * sizeof(__u64));
162
163 count[0] += single_count[0];
164 if (scale) {
165 count[1] += single_count[1];
166 count[2] += single_count[2];
167 }
168 }
169
170 scaled = 0;
171 if (scale) {
172 if (count[2] == 0) {
173 fprintf(stderr, " %14s %-20s\n",
174 "<not counted>", event_name(counter));
175 continue;
176 }
177 if (count[2] < count[1]) {
178 scaled = 1;
179 count[0] = (unsigned long long)
180 ((double)count[0] * count[1] / count[2] + 0.5);
181 }
182 }
183
184 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
185 event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
186
187 double msecs = (double)count[0] / 1000000;
188
189 fprintf(stderr, " %14.6f %-20s (msecs)",
190 msecs, event_name(counter));
191 } else {
192 fprintf(stderr, " %14Ld %-20s (events)",
193 count[0], event_name(counter));
194 }
195 if (scaled)
196 fprintf(stderr, " (scaled from %.2f%%)",
197 (double) count[2] / count[1] * 100);
198 fprintf(stderr, "\n");
199 }
200 fprintf(stderr, "\n");
201 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
202 (double)(t1-t0)/1e6);
203 fprintf(stderr, "\n");
204
205 return 0;
206}
207
5242519b 208static void skip_signal(int signo)
ddcacfa0 209{
5242519b
IM
210}
211
212static const char * const stat_usage[] = {
213 "perf stat [<options>] <command>",
214 NULL
215};
216
217static char events_help_msg[EVENTS_HELP_MAX];
218
219static const struct option options[] = {
220 OPT_CALLBACK('e', "event", NULL, "event",
221 events_help_msg, parse_events),
222 OPT_INTEGER('c', "count", &default_interval,
223 "event period to sample"),
224 OPT_BOOLEAN('i', "inherit", &inherit,
225 "child tasks inherit counters"),
226 OPT_INTEGER('p', "pid", &target_pid,
227 "stat events on existing pid"),
228 OPT_BOOLEAN('a', "all-cpus", &system_wide,
229 "system-wide collection from all CPUs"),
230 OPT_BOOLEAN('l', "scale", &scale,
231 "scale/normalize counters"),
232 OPT_END()
233};
234
235int cmd_stat(int argc, const char **argv, const char *prefix)
236{
237 int counter;
238
239 page_size = sysconf(_SC_PAGE_SIZE);
240
241 create_events_help(events_help_msg);
242 memcpy(event_id, default_event_id, sizeof(default_event_id));
243
244 argc = parse_options(argc, argv, options, stat_usage, 0);
245 if (!argc)
246 usage_with_options(stat_usage, options);
ddcacfa0
IM
247
248 if (!nr_counters) {
249 nr_counters = 8;
250 }
251
252 for (counter = 0; counter < nr_counters; counter++) {
253 if (event_count[counter])
254 continue;
255
256 event_count[counter] = default_interval;
257 }
ddcacfa0
IM
258 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
259 assert(nr_cpus <= MAX_NR_CPUS);
260 assert(nr_cpus >= 0);
261
58d7e993
IM
262 /*
263 * We dont want to block the signals - that would cause
264 * child tasks to inherit that and Ctrl-C would not work.
265 * What we want is for Ctrl-C to work in the exec()-ed
266 * task, but being ignored by perf stat itself:
267 */
268 signal(SIGINT, skip_signal);
269 signal(SIGALRM, skip_signal);
270 signal(SIGABRT, skip_signal);
271
ddcacfa0
IM
272 return do_perfstat(argc, argv);
273}