Merge remote-tracking branches 'asoc/topic/sunxi', 'asoc/topic/topology' and 'asoc...
[linux-2.6-block.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 #include "util/data.h"
30 #include "util/perf_regs.h"
31 #include "util/auxtrace.h"
32 #include "util/parse-branch-options.h"
33 #include "util/parse-regs-options.h"
34 #include "util/llvm-utils.h"
35
36 #include <unistd.h>
37 #include <sched.h>
38 #include <sys/mman.h>
39
40
41 struct record {
42         struct perf_tool        tool;
43         struct record_opts      opts;
44         u64                     bytes_written;
45         struct perf_data_file   file;
46         struct auxtrace_record  *itr;
47         struct perf_evlist      *evlist;
48         struct perf_session     *session;
49         const char              *progname;
50         int                     realtime_prio;
51         bool                    no_buildid;
52         bool                    no_buildid_cache;
53         bool                    buildid_all;
54         unsigned long long      samples;
55 };
56
57 static int record__write(struct record *rec, void *bf, size_t size)
58 {
59         if (perf_data_file__write(rec->session->file, bf, size) < 0) {
60                 pr_err("failed to write perf data, error: %m\n");
61                 return -1;
62         }
63
64         rec->bytes_written += size;
65         return 0;
66 }
67
68 static int process_synthesized_event(struct perf_tool *tool,
69                                      union perf_event *event,
70                                      struct perf_sample *sample __maybe_unused,
71                                      struct machine *machine __maybe_unused)
72 {
73         struct record *rec = container_of(tool, struct record, tool);
74         return record__write(rec, event, event->header.size);
75 }
76
77 static int record__mmap_read(struct record *rec, int idx)
78 {
79         struct perf_mmap *md = &rec->evlist->mmap[idx];
80         u64 head = perf_mmap__read_head(md);
81         u64 old = md->prev;
82         unsigned char *data = md->base + page_size;
83         unsigned long size;
84         void *buf;
85         int rc = 0;
86
87         if (old == head)
88                 return 0;
89
90         rec->samples++;
91
92         size = head - old;
93
94         if ((old & md->mask) + size != (head & md->mask)) {
95                 buf = &data[old & md->mask];
96                 size = md->mask + 1 - (old & md->mask);
97                 old += size;
98
99                 if (record__write(rec, buf, size) < 0) {
100                         rc = -1;
101                         goto out;
102                 }
103         }
104
105         buf = &data[old & md->mask];
106         size = head - old;
107         old += size;
108
109         if (record__write(rec, buf, size) < 0) {
110                 rc = -1;
111                 goto out;
112         }
113
114         md->prev = old;
115         perf_evlist__mmap_consume(rec->evlist, idx);
116 out:
117         return rc;
118 }
119
120 static volatile int done;
121 static volatile int signr = -1;
122 static volatile int child_finished;
123 static volatile int auxtrace_snapshot_enabled;
124 static volatile int auxtrace_snapshot_err;
125 static volatile int auxtrace_record__snapshot_started;
126
127 static void sig_handler(int sig)
128 {
129         if (sig == SIGCHLD)
130                 child_finished = 1;
131         else
132                 signr = sig;
133
134         done = 1;
135 }
136
137 static void record__sig_exit(void)
138 {
139         if (signr == -1)
140                 return;
141
142         signal(signr, SIG_DFL);
143         raise(signr);
144 }
145
146 #ifdef HAVE_AUXTRACE_SUPPORT
147
148 static int record__process_auxtrace(struct perf_tool *tool,
149                                     union perf_event *event, void *data1,
150                                     size_t len1, void *data2, size_t len2)
151 {
152         struct record *rec = container_of(tool, struct record, tool);
153         struct perf_data_file *file = &rec->file;
154         size_t padding;
155         u8 pad[8] = {0};
156
157         if (!perf_data_file__is_pipe(file)) {
158                 off_t file_offset;
159                 int fd = perf_data_file__fd(file);
160                 int err;
161
162                 file_offset = lseek(fd, 0, SEEK_CUR);
163                 if (file_offset == -1)
164                         return -1;
165                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
166                                                      event, file_offset);
167                 if (err)
168                         return err;
169         }
170
171         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
172         padding = (len1 + len2) & 7;
173         if (padding)
174                 padding = 8 - padding;
175
176         record__write(rec, event, event->header.size);
177         record__write(rec, data1, len1);
178         if (len2)
179                 record__write(rec, data2, len2);
180         record__write(rec, &pad, padding);
181
182         return 0;
183 }
184
185 static int record__auxtrace_mmap_read(struct record *rec,
186                                       struct auxtrace_mmap *mm)
187 {
188         int ret;
189
190         ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
191                                   record__process_auxtrace);
192         if (ret < 0)
193                 return ret;
194
195         if (ret)
196                 rec->samples++;
197
198         return 0;
199 }
200
201 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
202                                                struct auxtrace_mmap *mm)
203 {
204         int ret;
205
206         ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
207                                            record__process_auxtrace,
208                                            rec->opts.auxtrace_snapshot_size);
209         if (ret < 0)
210                 return ret;
211
212         if (ret)
213                 rec->samples++;
214
215         return 0;
216 }
217
218 static int record__auxtrace_read_snapshot_all(struct record *rec)
219 {
220         int i;
221         int rc = 0;
222
223         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
224                 struct auxtrace_mmap *mm =
225                                 &rec->evlist->mmap[i].auxtrace_mmap;
226
227                 if (!mm->base)
228                         continue;
229
230                 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
231                         rc = -1;
232                         goto out;
233                 }
234         }
235 out:
236         return rc;
237 }
238
239 static void record__read_auxtrace_snapshot(struct record *rec)
240 {
241         pr_debug("Recording AUX area tracing snapshot\n");
242         if (record__auxtrace_read_snapshot_all(rec) < 0) {
243                 auxtrace_snapshot_err = -1;
244         } else {
245                 auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
246                 if (!auxtrace_snapshot_err)
247                         auxtrace_snapshot_enabled = 1;
248         }
249 }
250
251 #else
252
253 static inline
254 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
255                                struct auxtrace_mmap *mm __maybe_unused)
256 {
257         return 0;
258 }
259
260 static inline
261 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
262 {
263 }
264
265 static inline
266 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
267 {
268         return 0;
269 }
270
271 #endif
272
273 static int record__open(struct record *rec)
274 {
275         char msg[512];
276         struct perf_evsel *pos;
277         struct perf_evlist *evlist = rec->evlist;
278         struct perf_session *session = rec->session;
279         struct record_opts *opts = &rec->opts;
280         int rc = 0;
281
282         perf_evlist__config(evlist, opts);
283
284         evlist__for_each(evlist, pos) {
285 try_again:
286                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
287                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
288                                 if (verbose)
289                                         ui__warning("%s\n", msg);
290                                 goto try_again;
291                         }
292
293                         rc = -errno;
294                         perf_evsel__open_strerror(pos, &opts->target,
295                                                   errno, msg, sizeof(msg));
296                         ui__error("%s\n", msg);
297                         goto out;
298                 }
299         }
300
301         if (perf_evlist__apply_filters(evlist, &pos)) {
302                 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
303                         pos->filter, perf_evsel__name(pos), errno,
304                         strerror_r(errno, msg, sizeof(msg)));
305                 rc = -1;
306                 goto out;
307         }
308
309         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
310                                  opts->auxtrace_mmap_pages,
311                                  opts->auxtrace_snapshot_mode) < 0) {
312                 if (errno == EPERM) {
313                         pr_err("Permission error mapping pages.\n"
314                                "Consider increasing "
315                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
316                                "or try again with a smaller value of -m/--mmap_pages.\n"
317                                "(current value: %u,%u)\n",
318                                opts->mmap_pages, opts->auxtrace_mmap_pages);
319                         rc = -errno;
320                 } else {
321                         pr_err("failed to mmap with %d (%s)\n", errno,
322                                 strerror_r(errno, msg, sizeof(msg)));
323                         rc = -errno;
324                 }
325                 goto out;
326         }
327
328         session->evlist = evlist;
329         perf_session__set_id_hdr_size(session);
330 out:
331         return rc;
332 }
333
334 static int process_sample_event(struct perf_tool *tool,
335                                 union perf_event *event,
336                                 struct perf_sample *sample,
337                                 struct perf_evsel *evsel,
338                                 struct machine *machine)
339 {
340         struct record *rec = container_of(tool, struct record, tool);
341
342         rec->samples++;
343
344         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
345 }
346
347 static int process_buildids(struct record *rec)
348 {
349         struct perf_data_file *file  = &rec->file;
350         struct perf_session *session = rec->session;
351
352         if (file->size == 0)
353                 return 0;
354
355         /*
356          * During this process, it'll load kernel map and replace the
357          * dso->long_name to a real pathname it found.  In this case
358          * we prefer the vmlinux path like
359          *   /lib/modules/3.16.4/build/vmlinux
360          *
361          * rather than build-id path (in debug directory).
362          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
363          */
364         symbol_conf.ignore_vmlinux_buildid = true;
365
366         /*
367          * If --buildid-all is given, it marks all DSO regardless of hits,
368          * so no need to process samples.
369          */
370         if (rec->buildid_all)
371                 rec->tool.sample = NULL;
372
373         return perf_session__process_events(session);
374 }
375
376 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
377 {
378         int err;
379         struct perf_tool *tool = data;
380         /*
381          *As for guest kernel when processing subcommand record&report,
382          *we arrange module mmap prior to guest kernel mmap and trigger
383          *a preload dso because default guest module symbols are loaded
384          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
385          *method is used to avoid symbol missing when the first addr is
386          *in module instead of in guest kernel.
387          */
388         err = perf_event__synthesize_modules(tool, process_synthesized_event,
389                                              machine);
390         if (err < 0)
391                 pr_err("Couldn't record guest kernel [%d]'s reference"
392                        " relocation symbol.\n", machine->pid);
393
394         /*
395          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
396          * have no _text sometimes.
397          */
398         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
399                                                  machine);
400         if (err < 0)
401                 pr_err("Couldn't record guest kernel [%d]'s reference"
402                        " relocation symbol.\n", machine->pid);
403 }
404
405 static struct perf_event_header finished_round_event = {
406         .size = sizeof(struct perf_event_header),
407         .type = PERF_RECORD_FINISHED_ROUND,
408 };
409
410 static int record__mmap_read_all(struct record *rec)
411 {
412         u64 bytes_written = rec->bytes_written;
413         int i;
414         int rc = 0;
415
416         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
417                 struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
418
419                 if (rec->evlist->mmap[i].base) {
420                         if (record__mmap_read(rec, i) != 0) {
421                                 rc = -1;
422                                 goto out;
423                         }
424                 }
425
426                 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
427                     record__auxtrace_mmap_read(rec, mm) != 0) {
428                         rc = -1;
429                         goto out;
430                 }
431         }
432
433         /*
434          * Mark the round finished in case we wrote
435          * at least one event.
436          */
437         if (bytes_written != rec->bytes_written)
438                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
439
440 out:
441         return rc;
442 }
443
444 static void record__init_features(struct record *rec)
445 {
446         struct perf_session *session = rec->session;
447         int feat;
448
449         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
450                 perf_header__set_feat(&session->header, feat);
451
452         if (rec->no_buildid)
453                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
454
455         if (!have_tracepoints(&rec->evlist->entries))
456                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
457
458         if (!rec->opts.branch_stack)
459                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
460
461         if (!rec->opts.full_auxtrace)
462                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
463
464         perf_header__clear_feat(&session->header, HEADER_STAT);
465 }
466
467 static volatile int workload_exec_errno;
468
469 /*
470  * perf_evlist__prepare_workload will send a SIGUSR1
471  * if the fork fails, since we asked by setting its
472  * want_signal to true.
473  */
474 static void workload_exec_failed_signal(int signo __maybe_unused,
475                                         siginfo_t *info,
476                                         void *ucontext __maybe_unused)
477 {
478         workload_exec_errno = info->si_value.sival_int;
479         done = 1;
480         child_finished = 1;
481 }
482
483 static void snapshot_sig_handler(int sig);
484
485 static int __cmd_record(struct record *rec, int argc, const char **argv)
486 {
487         int err;
488         int status = 0;
489         unsigned long waking = 0;
490         const bool forks = argc > 0;
491         struct machine *machine;
492         struct perf_tool *tool = &rec->tool;
493         struct record_opts *opts = &rec->opts;
494         struct perf_data_file *file = &rec->file;
495         struct perf_session *session;
496         bool disabled = false, draining = false;
497         int fd;
498
499         rec->progname = argv[0];
500
501         atexit(record__sig_exit);
502         signal(SIGCHLD, sig_handler);
503         signal(SIGINT, sig_handler);
504         signal(SIGTERM, sig_handler);
505         if (rec->opts.auxtrace_snapshot_mode)
506                 signal(SIGUSR2, snapshot_sig_handler);
507         else
508                 signal(SIGUSR2, SIG_IGN);
509
510         session = perf_session__new(file, false, tool);
511         if (session == NULL) {
512                 pr_err("Perf session creation failed.\n");
513                 return -1;
514         }
515
516         fd = perf_data_file__fd(file);
517         rec->session = session;
518
519         record__init_features(rec);
520
521         if (forks) {
522                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
523                                                     argv, file->is_pipe,
524                                                     workload_exec_failed_signal);
525                 if (err < 0) {
526                         pr_err("Couldn't run the workload!\n");
527                         status = err;
528                         goto out_delete_session;
529                 }
530         }
531
532         if (record__open(rec) != 0) {
533                 err = -1;
534                 goto out_child;
535         }
536
537         /*
538          * Normally perf_session__new would do this, but it doesn't have the
539          * evlist.
540          */
541         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
542                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
543                 rec->tool.ordered_events = false;
544         }
545
546         if (!rec->evlist->nr_groups)
547                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
548
549         if (file->is_pipe) {
550                 err = perf_header__write_pipe(fd);
551                 if (err < 0)
552                         goto out_child;
553         } else {
554                 err = perf_session__write_header(session, rec->evlist, fd, false);
555                 if (err < 0)
556                         goto out_child;
557         }
558
559         if (!rec->no_buildid
560             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
561                 pr_err("Couldn't generate buildids. "
562                        "Use --no-buildid to profile anyway.\n");
563                 err = -1;
564                 goto out_child;
565         }
566
567         machine = &session->machines.host;
568
569         if (file->is_pipe) {
570                 err = perf_event__synthesize_attrs(tool, session,
571                                                    process_synthesized_event);
572                 if (err < 0) {
573                         pr_err("Couldn't synthesize attrs.\n");
574                         goto out_child;
575                 }
576
577                 if (have_tracepoints(&rec->evlist->entries)) {
578                         /*
579                          * FIXME err <= 0 here actually means that
580                          * there were no tracepoints so its not really
581                          * an error, just that we don't need to
582                          * synthesize anything.  We really have to
583                          * return this more properly and also
584                          * propagate errors that now are calling die()
585                          */
586                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
587                                                                   process_synthesized_event);
588                         if (err <= 0) {
589                                 pr_err("Couldn't record tracing data.\n");
590                                 goto out_child;
591                         }
592                         rec->bytes_written += err;
593                 }
594         }
595
596         if (rec->opts.full_auxtrace) {
597                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
598                                         session, process_synthesized_event);
599                 if (err)
600                         goto out_delete_session;
601         }
602
603         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
604                                                  machine);
605         if (err < 0)
606                 pr_err("Couldn't record kernel reference relocation symbol\n"
607                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
608                        "Check /proc/kallsyms permission or run as root.\n");
609
610         err = perf_event__synthesize_modules(tool, process_synthesized_event,
611                                              machine);
612         if (err < 0)
613                 pr_err("Couldn't record kernel module information.\n"
614                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
615                        "Check /proc/modules permission or run as root.\n");
616
617         if (perf_guest) {
618                 machines__process_guests(&session->machines,
619                                          perf_event__synthesize_guest_os, tool);
620         }
621
622         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
623                                             process_synthesized_event, opts->sample_address,
624                                             opts->proc_map_timeout);
625         if (err != 0)
626                 goto out_child;
627
628         if (rec->realtime_prio) {
629                 struct sched_param param;
630
631                 param.sched_priority = rec->realtime_prio;
632                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
633                         pr_err("Could not set realtime priority.\n");
634                         err = -1;
635                         goto out_child;
636                 }
637         }
638
639         /*
640          * When perf is starting the traced process, all the events
641          * (apart from group members) have enable_on_exec=1 set,
642          * so don't spoil it by prematurely enabling them.
643          */
644         if (!target__none(&opts->target) && !opts->initial_delay)
645                 perf_evlist__enable(rec->evlist);
646
647         /*
648          * Let the child rip
649          */
650         if (forks) {
651                 union perf_event *event;
652
653                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
654                 if (event == NULL) {
655                         err = -ENOMEM;
656                         goto out_child;
657                 }
658
659                 /*
660                  * Some H/W events are generated before COMM event
661                  * which is emitted during exec(), so perf script
662                  * cannot see a correct process name for those events.
663                  * Synthesize COMM event to prevent it.
664                  */
665                 perf_event__synthesize_comm(tool, event,
666                                             rec->evlist->workload.pid,
667                                             process_synthesized_event,
668                                             machine);
669                 free(event);
670
671                 perf_evlist__start_workload(rec->evlist);
672         }
673
674         if (opts->initial_delay) {
675                 usleep(opts->initial_delay * 1000);
676                 perf_evlist__enable(rec->evlist);
677         }
678
679         auxtrace_snapshot_enabled = 1;
680         for (;;) {
681                 unsigned long long hits = rec->samples;
682
683                 if (record__mmap_read_all(rec) < 0) {
684                         auxtrace_snapshot_enabled = 0;
685                         err = -1;
686                         goto out_child;
687                 }
688
689                 if (auxtrace_record__snapshot_started) {
690                         auxtrace_record__snapshot_started = 0;
691                         if (!auxtrace_snapshot_err)
692                                 record__read_auxtrace_snapshot(rec);
693                         if (auxtrace_snapshot_err) {
694                                 pr_err("AUX area tracing snapshot failed\n");
695                                 err = -1;
696                                 goto out_child;
697                         }
698                 }
699
700                 if (hits == rec->samples) {
701                         if (done || draining)
702                                 break;
703                         err = perf_evlist__poll(rec->evlist, -1);
704                         /*
705                          * Propagate error, only if there's any. Ignore positive
706                          * number of returned events and interrupt error.
707                          */
708                         if (err > 0 || (err < 0 && errno == EINTR))
709                                 err = 0;
710                         waking++;
711
712                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
713                                 draining = true;
714                 }
715
716                 /*
717                  * When perf is starting the traced process, at the end events
718                  * die with the process and we wait for that. Thus no need to
719                  * disable events in this case.
720                  */
721                 if (done && !disabled && !target__none(&opts->target)) {
722                         auxtrace_snapshot_enabled = 0;
723                         perf_evlist__disable(rec->evlist);
724                         disabled = true;
725                 }
726         }
727         auxtrace_snapshot_enabled = 0;
728
729         if (forks && workload_exec_errno) {
730                 char msg[STRERR_BUFSIZE];
731                 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
732                 pr_err("Workload failed: %s\n", emsg);
733                 err = -1;
734                 goto out_child;
735         }
736
737         if (!quiet)
738                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
739
740 out_child:
741         if (forks) {
742                 int exit_status;
743
744                 if (!child_finished)
745                         kill(rec->evlist->workload.pid, SIGTERM);
746
747                 wait(&exit_status);
748
749                 if (err < 0)
750                         status = err;
751                 else if (WIFEXITED(exit_status))
752                         status = WEXITSTATUS(exit_status);
753                 else if (WIFSIGNALED(exit_status))
754                         signr = WTERMSIG(exit_status);
755         } else
756                 status = err;
757
758         /* this will be recalculated during process_buildids() */
759         rec->samples = 0;
760
761         if (!err && !file->is_pipe) {
762                 rec->session->header.data_size += rec->bytes_written;
763                 file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
764
765                 if (!rec->no_buildid) {
766                         process_buildids(rec);
767
768                         if (rec->buildid_all)
769                                 dsos__hit_all(rec->session);
770                 }
771                 perf_session__write_header(rec->session, rec->evlist, fd, true);
772         }
773
774         if (!err && !quiet) {
775                 char samples[128];
776
777                 if (rec->samples && !rec->opts.full_auxtrace)
778                         scnprintf(samples, sizeof(samples),
779                                   " (%" PRIu64 " samples)", rec->samples);
780                 else
781                         samples[0] = '\0';
782
783                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n",
784                         perf_data_file__size(file) / 1024.0 / 1024.0,
785                         file->path, samples);
786         }
787
788 out_delete_session:
789         perf_session__delete(session);
790         return status;
791 }
792
793 static void callchain_debug(void)
794 {
795         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
796
797         pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
798
799         if (callchain_param.record_mode == CALLCHAIN_DWARF)
800                 pr_debug("callchain: stack dump size %d\n",
801                          callchain_param.dump_size);
802 }
803
804 int record_parse_callchain_opt(const struct option *opt,
805                                const char *arg,
806                                int unset)
807 {
808         int ret;
809         struct record_opts *record = (struct record_opts *)opt->value;
810
811         record->callgraph_set = true;
812         callchain_param.enabled = !unset;
813
814         /* --no-call-graph */
815         if (unset) {
816                 callchain_param.record_mode = CALLCHAIN_NONE;
817                 pr_debug("callchain: disabled\n");
818                 return 0;
819         }
820
821         ret = parse_callchain_record_opt(arg, &callchain_param);
822         if (!ret) {
823                 /* Enable data address sampling for DWARF unwind. */
824                 if (callchain_param.record_mode == CALLCHAIN_DWARF)
825                         record->sample_address = true;
826                 callchain_debug();
827         }
828
829         return ret;
830 }
831
832 int record_callchain_opt(const struct option *opt,
833                          const char *arg __maybe_unused,
834                          int unset __maybe_unused)
835 {
836         struct record_opts *record = (struct record_opts *)opt->value;
837
838         record->callgraph_set = true;
839         callchain_param.enabled = true;
840
841         if (callchain_param.record_mode == CALLCHAIN_NONE)
842                 callchain_param.record_mode = CALLCHAIN_FP;
843
844         callchain_debug();
845         return 0;
846 }
847
848 static int perf_record_config(const char *var, const char *value, void *cb)
849 {
850         struct record *rec = cb;
851
852         if (!strcmp(var, "record.build-id")) {
853                 if (!strcmp(value, "cache"))
854                         rec->no_buildid_cache = false;
855                 else if (!strcmp(value, "no-cache"))
856                         rec->no_buildid_cache = true;
857                 else if (!strcmp(value, "skip"))
858                         rec->no_buildid = true;
859                 else
860                         return -1;
861                 return 0;
862         }
863         if (!strcmp(var, "record.call-graph"))
864                 var = "call-graph.record-mode"; /* fall-through */
865
866         return perf_default_config(var, value, cb);
867 }
868
869 struct clockid_map {
870         const char *name;
871         int clockid;
872 };
873
874 #define CLOCKID_MAP(n, c)       \
875         { .name = n, .clockid = (c), }
876
877 #define CLOCKID_END     { .name = NULL, }
878
879
880 /*
881  * Add the missing ones, we need to build on many distros...
882  */
883 #ifndef CLOCK_MONOTONIC_RAW
884 #define CLOCK_MONOTONIC_RAW 4
885 #endif
886 #ifndef CLOCK_BOOTTIME
887 #define CLOCK_BOOTTIME 7
888 #endif
889 #ifndef CLOCK_TAI
890 #define CLOCK_TAI 11
891 #endif
892
893 static const struct clockid_map clockids[] = {
894         /* available for all events, NMI safe */
895         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
896         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
897
898         /* available for some events */
899         CLOCKID_MAP("realtime", CLOCK_REALTIME),
900         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
901         CLOCKID_MAP("tai", CLOCK_TAI),
902
903         /* available for the lazy */
904         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
905         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
906         CLOCKID_MAP("real", CLOCK_REALTIME),
907         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
908
909         CLOCKID_END,
910 };
911
912 static int parse_clockid(const struct option *opt, const char *str, int unset)
913 {
914         struct record_opts *opts = (struct record_opts *)opt->value;
915         const struct clockid_map *cm;
916         const char *ostr = str;
917
918         if (unset) {
919                 opts->use_clockid = 0;
920                 return 0;
921         }
922
923         /* no arg passed */
924         if (!str)
925                 return 0;
926
927         /* no setting it twice */
928         if (opts->use_clockid)
929                 return -1;
930
931         opts->use_clockid = true;
932
933         /* if its a number, we're done */
934         if (sscanf(str, "%d", &opts->clockid) == 1)
935                 return 0;
936
937         /* allow a "CLOCK_" prefix to the name */
938         if (!strncasecmp(str, "CLOCK_", 6))
939                 str += 6;
940
941         for (cm = clockids; cm->name; cm++) {
942                 if (!strcasecmp(str, cm->name)) {
943                         opts->clockid = cm->clockid;
944                         return 0;
945                 }
946         }
947
948         opts->use_clockid = false;
949         ui__warning("unknown clockid %s, check man page\n", ostr);
950         return -1;
951 }
952
953 static int record__parse_mmap_pages(const struct option *opt,
954                                     const char *str,
955                                     int unset __maybe_unused)
956 {
957         struct record_opts *opts = opt->value;
958         char *s, *p;
959         unsigned int mmap_pages;
960         int ret;
961
962         if (!str)
963                 return -EINVAL;
964
965         s = strdup(str);
966         if (!s)
967                 return -ENOMEM;
968
969         p = strchr(s, ',');
970         if (p)
971                 *p = '\0';
972
973         if (*s) {
974                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
975                 if (ret)
976                         goto out_free;
977                 opts->mmap_pages = mmap_pages;
978         }
979
980         if (!p) {
981                 ret = 0;
982                 goto out_free;
983         }
984
985         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
986         if (ret)
987                 goto out_free;
988
989         opts->auxtrace_mmap_pages = mmap_pages;
990
991 out_free:
992         free(s);
993         return ret;
994 }
995
996 static const char * const __record_usage[] = {
997         "perf record [<options>] [<command>]",
998         "perf record [<options>] -- <command> [<options>]",
999         NULL
1000 };
1001 const char * const *record_usage = __record_usage;
1002
1003 /*
1004  * XXX Ideally would be local to cmd_record() and passed to a record__new
1005  * because we need to have access to it in record__exit, that is called
1006  * after cmd_record() exits, but since record_options need to be accessible to
1007  * builtin-script, leave it here.
1008  *
1009  * At least we don't ouch it in all the other functions here directly.
1010  *
1011  * Just say no to tons of global variables, sigh.
1012  */
1013 static struct record record = {
1014         .opts = {
1015                 .sample_time         = true,
1016                 .mmap_pages          = UINT_MAX,
1017                 .user_freq           = UINT_MAX,
1018                 .user_interval       = ULLONG_MAX,
1019                 .freq                = 4000,
1020                 .target              = {
1021                         .uses_mmap   = true,
1022                         .default_per_cpu = true,
1023                 },
1024                 .proc_map_timeout     = 500,
1025         },
1026         .tool = {
1027                 .sample         = process_sample_event,
1028                 .fork           = perf_event__process_fork,
1029                 .exit           = perf_event__process_exit,
1030                 .comm           = perf_event__process_comm,
1031                 .mmap           = perf_event__process_mmap,
1032                 .mmap2          = perf_event__process_mmap2,
1033                 .ordered_events = true,
1034         },
1035 };
1036
1037 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1038         "\n\t\t\t\tDefault: fp";
1039
1040 /*
1041  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1042  * with it and switch to use the library functions in perf_evlist that came
1043  * from builtin-record.c, i.e. use record_opts,
1044  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1045  * using pipes, etc.
1046  */
1047 struct option __record_options[] = {
1048         OPT_CALLBACK('e', "event", &record.evlist, "event",
1049                      "event selector. use 'perf list' to list available events",
1050                      parse_events_option),
1051         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1052                      "event filter", parse_filter),
1053         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1054                            NULL, "don't record events from perf itself",
1055                            exclude_perf),
1056         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1057                     "record events on existing process id"),
1058         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1059                     "record events on existing thread id"),
1060         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1061                     "collect data with this RT SCHED_FIFO priority"),
1062         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1063                     "collect data without buffering"),
1064         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1065                     "collect raw sample records from all opened counters"),
1066         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1067                             "system-wide collection from all CPUs"),
1068         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1069                     "list of cpus to monitor"),
1070         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1071         OPT_STRING('o', "output", &record.file.path, "file",
1072                     "output file name"),
1073         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1074                         &record.opts.no_inherit_set,
1075                         "child tasks do not inherit counters"),
1076         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1077         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1078                      "number of mmap data pages and AUX area tracing mmap pages",
1079                      record__parse_mmap_pages),
1080         OPT_BOOLEAN(0, "group", &record.opts.group,
1081                     "put the counters into a counter group"),
1082         OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
1083                            NULL, "enables call-graph recording" ,
1084                            &record_callchain_opt),
1085         OPT_CALLBACK(0, "call-graph", &record.opts,
1086                      "record_mode[,record_size]", record_callchain_help,
1087                      &record_parse_callchain_opt),
1088         OPT_INCR('v', "verbose", &verbose,
1089                     "be more verbose (show counter open errors, etc)"),
1090         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1091         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1092                     "per thread counts"),
1093         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1094         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1095                         &record.opts.sample_time_set,
1096                         "Record the sample timestamps"),
1097         OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1098         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1099                     "don't sample"),
1100         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
1101                     "do not update the buildid cache"),
1102         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
1103                     "do not collect buildids in perf.data"),
1104         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1105                      "monitor event in cgroup name only",
1106                      parse_cgroups),
1107         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1108                   "ms to wait before starting measurement after program start"),
1109         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1110                    "user to profile"),
1111
1112         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1113                      "branch any", "sample any taken branches",
1114                      parse_branch_stack),
1115
1116         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1117                      "branch filter mask", "branch stack filter modes",
1118                      parse_branch_stack),
1119         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1120                     "sample by weight (on special events only)"),
1121         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1122                     "sample transaction flags (special events only)"),
1123         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1124                     "use per-thread mmaps"),
1125         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1126                     "sample selected machine registers on interrupt,"
1127                     " use -I ? to list register names", parse_regs),
1128         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1129                     "Record running/enabled time of read (:S) events"),
1130         OPT_CALLBACK('k', "clockid", &record.opts,
1131         "clockid", "clockid to use for events, see clock_gettime()",
1132         parse_clockid),
1133         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1134                           "opts", "AUX area tracing Snapshot Mode", ""),
1135         OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1136                         "per thread proc mmap processing timeout in ms"),
1137         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1138                     "Record context switch events"),
1139         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1140                    "clang binary to use for compiling BPF scriptlets"),
1141         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1142                    "options passed to clang when compiling BPF scriptlets"),
1143         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1144                    "file", "vmlinux pathname"),
1145         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1146                     "Record build-id of all DSOs regardless of hits"),
1147         OPT_END()
1148 };
1149
1150 struct option *record_options = __record_options;
1151
1152 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1153 {
1154         int err;
1155         struct record *rec = &record;
1156         char errbuf[BUFSIZ];
1157
1158 #ifndef HAVE_LIBBPF_SUPPORT
1159 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1160         set_nobuild('\0', "clang-path", true);
1161         set_nobuild('\0', "clang-opt", true);
1162 # undef set_nobuild
1163 #endif
1164
1165 #ifndef HAVE_BPF_PROLOGUE
1166 # if !defined (HAVE_DWARF_SUPPORT)
1167 #  define REASON  "NO_DWARF=1"
1168 # elif !defined (HAVE_LIBBPF_SUPPORT)
1169 #  define REASON  "NO_LIBBPF=1"
1170 # else
1171 #  define REASON  "this architecture doesn't support BPF prologue"
1172 # endif
1173 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1174         set_nobuild('\0', "vmlinux", true);
1175 # undef set_nobuild
1176 # undef REASON
1177 #endif
1178
1179         rec->evlist = perf_evlist__new();
1180         if (rec->evlist == NULL)
1181                 return -ENOMEM;
1182
1183         perf_config(perf_record_config, rec);
1184
1185         argc = parse_options(argc, argv, record_options, record_usage,
1186                             PARSE_OPT_STOP_AT_NON_OPTION);
1187         if (!argc && target__none(&rec->opts.target))
1188                 usage_with_options(record_usage, record_options);
1189
1190         if (nr_cgroups && !rec->opts.target.system_wide) {
1191                 usage_with_options_msg(record_usage, record_options,
1192                         "cgroup monitoring only available in system-wide mode");
1193
1194         }
1195         if (rec->opts.record_switch_events &&
1196             !perf_can_record_switch_events()) {
1197                 ui__error("kernel does not support recording context switch events\n");
1198                 parse_options_usage(record_usage, record_options, "switch-events", 0);
1199                 return -EINVAL;
1200         }
1201
1202         if (!rec->itr) {
1203                 rec->itr = auxtrace_record__init(rec->evlist, &err);
1204                 if (err)
1205                         return err;
1206         }
1207
1208         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1209                                               rec->opts.auxtrace_snapshot_opts);
1210         if (err)
1211                 return err;
1212
1213         err = -ENOMEM;
1214
1215         symbol__init(NULL);
1216
1217         if (symbol_conf.kptr_restrict)
1218                 pr_warning(
1219 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1220 "check /proc/sys/kernel/kptr_restrict.\n\n"
1221 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1222 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1223 "Samples in kernel modules won't be resolved at all.\n\n"
1224 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1225 "even with a suitable vmlinux or kallsyms file.\n\n");
1226
1227         if (rec->no_buildid_cache || rec->no_buildid)
1228                 disable_buildid_cache();
1229
1230         if (rec->evlist->nr_entries == 0 &&
1231             perf_evlist__add_default(rec->evlist) < 0) {
1232                 pr_err("Not enough memory for event selector list\n");
1233                 goto out_symbol_exit;
1234         }
1235
1236         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1237                 rec->opts.no_inherit = true;
1238
1239         err = target__validate(&rec->opts.target);
1240         if (err) {
1241                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1242                 ui__warning("%s", errbuf);
1243         }
1244
1245         err = target__parse_uid(&rec->opts.target);
1246         if (err) {
1247                 int saved_errno = errno;
1248
1249                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1250                 ui__error("%s", errbuf);
1251
1252                 err = -saved_errno;
1253                 goto out_symbol_exit;
1254         }
1255
1256         err = -ENOMEM;
1257         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1258                 usage_with_options(record_usage, record_options);
1259
1260         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1261         if (err)
1262                 goto out_symbol_exit;
1263
1264         /*
1265          * We take all buildids when the file contains
1266          * AUX area tracing data because we do not decode the
1267          * trace because it would take too long.
1268          */
1269         if (rec->opts.full_auxtrace)
1270                 rec->buildid_all = true;
1271
1272         if (record_opts__config(&rec->opts)) {
1273                 err = -EINVAL;
1274                 goto out_symbol_exit;
1275         }
1276
1277         err = __cmd_record(&record, argc, argv);
1278 out_symbol_exit:
1279         perf_evlist__delete(rec->evlist);
1280         symbol__exit();
1281         auxtrace_record__free(rec->itr);
1282         return err;
1283 }
1284
1285 static void snapshot_sig_handler(int sig __maybe_unused)
1286 {
1287         if (!auxtrace_snapshot_enabled)
1288                 return;
1289         auxtrace_snapshot_enabled = 0;
1290         auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
1291         auxtrace_record__snapshot_started = 1;
1292 }