Merge tag 'armsoc-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
[linux-2.6-block.git] / tools / perf / builtin-record.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10
11 #include "perf.h"
12
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/drv_configs.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/cpumap.h"
31 #include "util/thread_map.h"
32 #include "util/data.h"
33 #include "util/perf_regs.h"
34 #include "util/auxtrace.h"
35 #include "util/tsc.h"
36 #include "util/parse-branch-options.h"
37 #include "util/parse-regs-options.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "asm/bug.h"
45
46 #include <errno.h>
47 #include <inttypes.h>
48 #include <poll.h>
49 #include <unistd.h>
50 #include <sched.h>
51 #include <signal.h>
52 #include <sys/mman.h>
53 #include <sys/wait.h>
54 #include <linux/time64.h>
55
56 struct switch_output {
57         bool             enabled;
58         bool             signal;
59         unsigned long    size;
60         unsigned long    time;
61         const char      *str;
62         bool             set;
63 };
64
65 struct record {
66         struct perf_tool        tool;
67         struct record_opts      opts;
68         u64                     bytes_written;
69         struct perf_data        data;
70         struct auxtrace_record  *itr;
71         struct perf_evlist      *evlist;
72         struct perf_session     *session;
73         const char              *progname;
74         int                     realtime_prio;
75         bool                    no_buildid;
76         bool                    no_buildid_set;
77         bool                    no_buildid_cache;
78         bool                    no_buildid_cache_set;
79         bool                    buildid_all;
80         bool                    timestamp_filename;
81         bool                    timestamp_boundary;
82         struct switch_output    switch_output;
83         unsigned long long      samples;
84 };
85
86 static volatile int auxtrace_record__snapshot_started;
87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
88 static DEFINE_TRIGGER(switch_output_trigger);
89
90 static bool switch_output_signal(struct record *rec)
91 {
92         return rec->switch_output.signal &&
93                trigger_is_ready(&switch_output_trigger);
94 }
95
96 static bool switch_output_size(struct record *rec)
97 {
98         return rec->switch_output.size &&
99                trigger_is_ready(&switch_output_trigger) &&
100                (rec->bytes_written >= rec->switch_output.size);
101 }
102
103 static bool switch_output_time(struct record *rec)
104 {
105         return rec->switch_output.time &&
106                trigger_is_ready(&switch_output_trigger);
107 }
108
109 static int record__write(struct record *rec, void *bf, size_t size)
110 {
111         if (perf_data__write(rec->session->data, bf, size) < 0) {
112                 pr_err("failed to write perf data, error: %m\n");
113                 return -1;
114         }
115
116         rec->bytes_written += size;
117
118         if (switch_output_size(rec))
119                 trigger_hit(&switch_output_trigger);
120
121         return 0;
122 }
123
124 static int process_synthesized_event(struct perf_tool *tool,
125                                      union perf_event *event,
126                                      struct perf_sample *sample __maybe_unused,
127                                      struct machine *machine __maybe_unused)
128 {
129         struct record *rec = container_of(tool, struct record, tool);
130         return record__write(rec, event, event->header.size);
131 }
132
133 static int record__pushfn(void *to, void *bf, size_t size)
134 {
135         struct record *rec = to;
136
137         rec->samples++;
138         return record__write(rec, bf, size);
139 }
140
141 static volatile int done;
142 static volatile int signr = -1;
143 static volatile int child_finished;
144
145 static void sig_handler(int sig)
146 {
147         if (sig == SIGCHLD)
148                 child_finished = 1;
149         else
150                 signr = sig;
151
152         done = 1;
153 }
154
155 static void sigsegv_handler(int sig)
156 {
157         perf_hooks__recover();
158         sighandler_dump_stack(sig);
159 }
160
161 static void record__sig_exit(void)
162 {
163         if (signr == -1)
164                 return;
165
166         signal(signr, SIG_DFL);
167         raise(signr);
168 }
169
170 #ifdef HAVE_AUXTRACE_SUPPORT
171
172 static int record__process_auxtrace(struct perf_tool *tool,
173                                     union perf_event *event, void *data1,
174                                     size_t len1, void *data2, size_t len2)
175 {
176         struct record *rec = container_of(tool, struct record, tool);
177         struct perf_data *data = &rec->data;
178         size_t padding;
179         u8 pad[8] = {0};
180
181         if (!perf_data__is_pipe(data)) {
182                 off_t file_offset;
183                 int fd = perf_data__fd(data);
184                 int err;
185
186                 file_offset = lseek(fd, 0, SEEK_CUR);
187                 if (file_offset == -1)
188                         return -1;
189                 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
190                                                      event, file_offset);
191                 if (err)
192                         return err;
193         }
194
195         /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
196         padding = (len1 + len2) & 7;
197         if (padding)
198                 padding = 8 - padding;
199
200         record__write(rec, event, event->header.size);
201         record__write(rec, data1, len1);
202         if (len2)
203                 record__write(rec, data2, len2);
204         record__write(rec, &pad, padding);
205
206         return 0;
207 }
208
209 static int record__auxtrace_mmap_read(struct record *rec,
210                                       struct auxtrace_mmap *mm)
211 {
212         int ret;
213
214         ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
215                                   record__process_auxtrace);
216         if (ret < 0)
217                 return ret;
218
219         if (ret)
220                 rec->samples++;
221
222         return 0;
223 }
224
225 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
226                                                struct auxtrace_mmap *mm)
227 {
228         int ret;
229
230         ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
231                                            record__process_auxtrace,
232                                            rec->opts.auxtrace_snapshot_size);
233         if (ret < 0)
234                 return ret;
235
236         if (ret)
237                 rec->samples++;
238
239         return 0;
240 }
241
242 static int record__auxtrace_read_snapshot_all(struct record *rec)
243 {
244         int i;
245         int rc = 0;
246
247         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
248                 struct auxtrace_mmap *mm =
249                                 &rec->evlist->mmap[i].auxtrace_mmap;
250
251                 if (!mm->base)
252                         continue;
253
254                 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
255                         rc = -1;
256                         goto out;
257                 }
258         }
259 out:
260         return rc;
261 }
262
263 static void record__read_auxtrace_snapshot(struct record *rec)
264 {
265         pr_debug("Recording AUX area tracing snapshot\n");
266         if (record__auxtrace_read_snapshot_all(rec) < 0) {
267                 trigger_error(&auxtrace_snapshot_trigger);
268         } else {
269                 if (auxtrace_record__snapshot_finish(rec->itr))
270                         trigger_error(&auxtrace_snapshot_trigger);
271                 else
272                         trigger_ready(&auxtrace_snapshot_trigger);
273         }
274 }
275
276 #else
277
278 static inline
279 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
280                                struct auxtrace_mmap *mm __maybe_unused)
281 {
282         return 0;
283 }
284
285 static inline
286 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
287 {
288 }
289
290 static inline
291 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
292 {
293         return 0;
294 }
295
296 #endif
297
298 static int record__mmap_evlist(struct record *rec,
299                                struct perf_evlist *evlist)
300 {
301         struct record_opts *opts = &rec->opts;
302         char msg[512];
303
304         if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
305                                  opts->auxtrace_mmap_pages,
306                                  opts->auxtrace_snapshot_mode) < 0) {
307                 if (errno == EPERM) {
308                         pr_err("Permission error mapping pages.\n"
309                                "Consider increasing "
310                                "/proc/sys/kernel/perf_event_mlock_kb,\n"
311                                "or try again with a smaller value of -m/--mmap_pages.\n"
312                                "(current value: %u,%u)\n",
313                                opts->mmap_pages, opts->auxtrace_mmap_pages);
314                         return -errno;
315                 } else {
316                         pr_err("failed to mmap with %d (%s)\n", errno,
317                                 str_error_r(errno, msg, sizeof(msg)));
318                         if (errno)
319                                 return -errno;
320                         else
321                                 return -EINVAL;
322                 }
323         }
324         return 0;
325 }
326
327 static int record__mmap(struct record *rec)
328 {
329         return record__mmap_evlist(rec, rec->evlist);
330 }
331
332 static int record__open(struct record *rec)
333 {
334         char msg[BUFSIZ];
335         struct perf_evsel *pos;
336         struct perf_evlist *evlist = rec->evlist;
337         struct perf_session *session = rec->session;
338         struct record_opts *opts = &rec->opts;
339         struct perf_evsel_config_term *err_term;
340         int rc = 0;
341
342         /*
343          * For initial_delay we need to add a dummy event so that we can track
344          * PERF_RECORD_MMAP while we wait for the initial delay to enable the
345          * real events, the ones asked by the user.
346          */
347         if (opts->initial_delay) {
348                 if (perf_evlist__add_dummy(evlist))
349                         return -ENOMEM;
350
351                 pos = perf_evlist__first(evlist);
352                 pos->tracking = 0;
353                 pos = perf_evlist__last(evlist);
354                 pos->tracking = 1;
355                 pos->attr.enable_on_exec = 1;
356         }
357
358         perf_evlist__config(evlist, opts, &callchain_param);
359
360         evlist__for_each_entry(evlist, pos) {
361 try_again:
362                 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
363                         if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
364                                 if (verbose > 0)
365                                         ui__warning("%s\n", msg);
366                                 goto try_again;
367                         }
368
369                         rc = -errno;
370                         perf_evsel__open_strerror(pos, &opts->target,
371                                                   errno, msg, sizeof(msg));
372                         ui__error("%s\n", msg);
373                         goto out;
374                 }
375
376                 pos->supported = true;
377         }
378
379         if (perf_evlist__apply_filters(evlist, &pos)) {
380                 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
381                         pos->filter, perf_evsel__name(pos), errno,
382                         str_error_r(errno, msg, sizeof(msg)));
383                 rc = -1;
384                 goto out;
385         }
386
387         if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
388                 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
389                       err_term->val.drv_cfg, perf_evsel__name(pos), errno,
390                       str_error_r(errno, msg, sizeof(msg)));
391                 rc = -1;
392                 goto out;
393         }
394
395         rc = record__mmap(rec);
396         if (rc)
397                 goto out;
398
399         session->evlist = evlist;
400         perf_session__set_id_hdr_size(session);
401 out:
402         return rc;
403 }
404
405 static int process_sample_event(struct perf_tool *tool,
406                                 union perf_event *event,
407                                 struct perf_sample *sample,
408                                 struct perf_evsel *evsel,
409                                 struct machine *machine)
410 {
411         struct record *rec = container_of(tool, struct record, tool);
412
413         if (rec->evlist->first_sample_time == 0)
414                 rec->evlist->first_sample_time = sample->time;
415
416         rec->evlist->last_sample_time = sample->time;
417
418         if (rec->buildid_all)
419                 return 0;
420
421         rec->samples++;
422         return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
423 }
424
425 static int process_buildids(struct record *rec)
426 {
427         struct perf_data *data = &rec->data;
428         struct perf_session *session = rec->session;
429
430         if (data->size == 0)
431                 return 0;
432
433         /*
434          * During this process, it'll load kernel map and replace the
435          * dso->long_name to a real pathname it found.  In this case
436          * we prefer the vmlinux path like
437          *   /lib/modules/3.16.4/build/vmlinux
438          *
439          * rather than build-id path (in debug directory).
440          *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
441          */
442         symbol_conf.ignore_vmlinux_buildid = true;
443
444         /*
445          * If --buildid-all is given, it marks all DSO regardless of hits,
446          * so no need to process samples. But if timestamp_boundary is enabled,
447          * it still needs to walk on all samples to get the timestamps of
448          * first/last samples.
449          */
450         if (rec->buildid_all && !rec->timestamp_boundary)
451                 rec->tool.sample = NULL;
452
453         return perf_session__process_events(session);
454 }
455
456 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
457 {
458         int err;
459         struct perf_tool *tool = data;
460         /*
461          *As for guest kernel when processing subcommand record&report,
462          *we arrange module mmap prior to guest kernel mmap and trigger
463          *a preload dso because default guest module symbols are loaded
464          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
465          *method is used to avoid symbol missing when the first addr is
466          *in module instead of in guest kernel.
467          */
468         err = perf_event__synthesize_modules(tool, process_synthesized_event,
469                                              machine);
470         if (err < 0)
471                 pr_err("Couldn't record guest kernel [%d]'s reference"
472                        " relocation symbol.\n", machine->pid);
473
474         /*
475          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
476          * have no _text sometimes.
477          */
478         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
479                                                  machine);
480         if (err < 0)
481                 pr_err("Couldn't record guest kernel [%d]'s reference"
482                        " relocation symbol.\n", machine->pid);
483 }
484
485 static struct perf_event_header finished_round_event = {
486         .size = sizeof(struct perf_event_header),
487         .type = PERF_RECORD_FINISHED_ROUND,
488 };
489
490 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
491                                     bool overwrite)
492 {
493         u64 bytes_written = rec->bytes_written;
494         int i;
495         int rc = 0;
496         struct perf_mmap *maps;
497
498         if (!evlist)
499                 return 0;
500
501         maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
502         if (!maps)
503                 return 0;
504
505         if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
506                 return 0;
507
508         for (i = 0; i < evlist->nr_mmaps; i++) {
509                 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
510
511                 if (maps[i].base) {
512                         if (perf_mmap__push(&maps[i], overwrite, rec, record__pushfn) != 0) {
513                                 rc = -1;
514                                 goto out;
515                         }
516                 }
517
518                 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
519                     record__auxtrace_mmap_read(rec, mm) != 0) {
520                         rc = -1;
521                         goto out;
522                 }
523         }
524
525         /*
526          * Mark the round finished in case we wrote
527          * at least one event.
528          */
529         if (bytes_written != rec->bytes_written)
530                 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
531
532         if (overwrite)
533                 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
534 out:
535         return rc;
536 }
537
538 static int record__mmap_read_all(struct record *rec)
539 {
540         int err;
541
542         err = record__mmap_read_evlist(rec, rec->evlist, false);
543         if (err)
544                 return err;
545
546         return record__mmap_read_evlist(rec, rec->evlist, true);
547 }
548
549 static void record__init_features(struct record *rec)
550 {
551         struct perf_session *session = rec->session;
552         int feat;
553
554         for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
555                 perf_header__set_feat(&session->header, feat);
556
557         if (rec->no_buildid)
558                 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
559
560         if (!have_tracepoints(&rec->evlist->entries))
561                 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
562
563         if (!rec->opts.branch_stack)
564                 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
565
566         if (!rec->opts.full_auxtrace)
567                 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
568
569         perf_header__clear_feat(&session->header, HEADER_STAT);
570 }
571
572 static void
573 record__finish_output(struct record *rec)
574 {
575         struct perf_data *data = &rec->data;
576         int fd = perf_data__fd(data);
577
578         if (data->is_pipe)
579                 return;
580
581         rec->session->header.data_size += rec->bytes_written;
582         data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
583
584         if (!rec->no_buildid) {
585                 process_buildids(rec);
586
587                 if (rec->buildid_all)
588                         dsos__hit_all(rec->session);
589         }
590         perf_session__write_header(rec->session, rec->evlist, fd, true);
591
592         return;
593 }
594
595 static int record__synthesize_workload(struct record *rec, bool tail)
596 {
597         int err;
598         struct thread_map *thread_map;
599
600         if (rec->opts.tail_synthesize != tail)
601                 return 0;
602
603         thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
604         if (thread_map == NULL)
605                 return -1;
606
607         err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
608                                                  process_synthesized_event,
609                                                  &rec->session->machines.host,
610                                                  rec->opts.sample_address,
611                                                  rec->opts.proc_map_timeout);
612         thread_map__put(thread_map);
613         return err;
614 }
615
616 static int record__synthesize(struct record *rec, bool tail);
617
618 static int
619 record__switch_output(struct record *rec, bool at_exit)
620 {
621         struct perf_data *data = &rec->data;
622         int fd, err;
623
624         /* Same Size:      "2015122520103046"*/
625         char timestamp[] = "InvalidTimestamp";
626
627         record__synthesize(rec, true);
628         if (target__none(&rec->opts.target))
629                 record__synthesize_workload(rec, true);
630
631         rec->samples = 0;
632         record__finish_output(rec);
633         err = fetch_current_timestamp(timestamp, sizeof(timestamp));
634         if (err) {
635                 pr_err("Failed to get current timestamp\n");
636                 return -EINVAL;
637         }
638
639         fd = perf_data__switch(data, timestamp,
640                                     rec->session->header.data_offset,
641                                     at_exit);
642         if (fd >= 0 && !at_exit) {
643                 rec->bytes_written = 0;
644                 rec->session->header.data_size = 0;
645         }
646
647         if (!quiet)
648                 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
649                         data->file.path, timestamp);
650
651         /* Output tracking events */
652         if (!at_exit) {
653                 record__synthesize(rec, false);
654
655                 /*
656                  * In 'perf record --switch-output' without -a,
657                  * record__synthesize() in record__switch_output() won't
658                  * generate tracking events because there's no thread_map
659                  * in evlist. Which causes newly created perf.data doesn't
660                  * contain map and comm information.
661                  * Create a fake thread_map and directly call
662                  * perf_event__synthesize_thread_map() for those events.
663                  */
664                 if (target__none(&rec->opts.target))
665                         record__synthesize_workload(rec, false);
666         }
667         return fd;
668 }
669
670 static volatile int workload_exec_errno;
671
672 /*
673  * perf_evlist__prepare_workload will send a SIGUSR1
674  * if the fork fails, since we asked by setting its
675  * want_signal to true.
676  */
677 static void workload_exec_failed_signal(int signo __maybe_unused,
678                                         siginfo_t *info,
679                                         void *ucontext __maybe_unused)
680 {
681         workload_exec_errno = info->si_value.sival_int;
682         done = 1;
683         child_finished = 1;
684 }
685
686 static void snapshot_sig_handler(int sig);
687 static void alarm_sig_handler(int sig);
688
689 int __weak
690 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
691                             struct perf_tool *tool __maybe_unused,
692                             perf_event__handler_t process __maybe_unused,
693                             struct machine *machine __maybe_unused)
694 {
695         return 0;
696 }
697
698 static const struct perf_event_mmap_page *
699 perf_evlist__pick_pc(struct perf_evlist *evlist)
700 {
701         if (evlist) {
702                 if (evlist->mmap && evlist->mmap[0].base)
703                         return evlist->mmap[0].base;
704                 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
705                         return evlist->overwrite_mmap[0].base;
706         }
707         return NULL;
708 }
709
710 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
711 {
712         const struct perf_event_mmap_page *pc;
713
714         pc = perf_evlist__pick_pc(rec->evlist);
715         if (pc)
716                 return pc;
717         return NULL;
718 }
719
720 static int record__synthesize(struct record *rec, bool tail)
721 {
722         struct perf_session *session = rec->session;
723         struct machine *machine = &session->machines.host;
724         struct perf_data *data = &rec->data;
725         struct record_opts *opts = &rec->opts;
726         struct perf_tool *tool = &rec->tool;
727         int fd = perf_data__fd(data);
728         int err = 0;
729
730         if (rec->opts.tail_synthesize != tail)
731                 return 0;
732
733         if (data->is_pipe) {
734                 err = perf_event__synthesize_features(
735                         tool, session, rec->evlist, process_synthesized_event);
736                 if (err < 0) {
737                         pr_err("Couldn't synthesize features.\n");
738                         return err;
739                 }
740
741                 err = perf_event__synthesize_attrs(tool, session,
742                                                    process_synthesized_event);
743                 if (err < 0) {
744                         pr_err("Couldn't synthesize attrs.\n");
745                         goto out;
746                 }
747
748                 if (have_tracepoints(&rec->evlist->entries)) {
749                         /*
750                          * FIXME err <= 0 here actually means that
751                          * there were no tracepoints so its not really
752                          * an error, just that we don't need to
753                          * synthesize anything.  We really have to
754                          * return this more properly and also
755                          * propagate errors that now are calling die()
756                          */
757                         err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
758                                                                   process_synthesized_event);
759                         if (err <= 0) {
760                                 pr_err("Couldn't record tracing data.\n");
761                                 goto out;
762                         }
763                         rec->bytes_written += err;
764                 }
765         }
766
767         err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
768                                           process_synthesized_event, machine);
769         if (err)
770                 goto out;
771
772         if (rec->opts.full_auxtrace) {
773                 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
774                                         session, process_synthesized_event);
775                 if (err)
776                         goto out;
777         }
778
779         if (!perf_evlist__exclude_kernel(rec->evlist)) {
780                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
781                                                          machine);
782                 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
783                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
784                                    "Check /proc/kallsyms permission or run as root.\n");
785
786                 err = perf_event__synthesize_modules(tool, process_synthesized_event,
787                                                      machine);
788                 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
789                                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
790                                    "Check /proc/modules permission or run as root.\n");
791         }
792
793         if (perf_guest) {
794                 machines__process_guests(&session->machines,
795                                          perf_event__synthesize_guest_os, tool);
796         }
797
798         err = perf_event__synthesize_extra_attr(&rec->tool,
799                                                 rec->evlist,
800                                                 process_synthesized_event,
801                                                 data->is_pipe);
802         if (err)
803                 goto out;
804
805         err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
806                                                  process_synthesized_event,
807                                                 NULL);
808         if (err < 0) {
809                 pr_err("Couldn't synthesize thread map.\n");
810                 return err;
811         }
812
813         err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
814                                              process_synthesized_event, NULL);
815         if (err < 0) {
816                 pr_err("Couldn't synthesize cpu map.\n");
817                 return err;
818         }
819
820         err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
821                                             process_synthesized_event, opts->sample_address,
822                                             opts->proc_map_timeout, 1);
823 out:
824         return err;
825 }
826
827 static int __cmd_record(struct record *rec, int argc, const char **argv)
828 {
829         int err;
830         int status = 0;
831         unsigned long waking = 0;
832         const bool forks = argc > 0;
833         struct machine *machine;
834         struct perf_tool *tool = &rec->tool;
835         struct record_opts *opts = &rec->opts;
836         struct perf_data *data = &rec->data;
837         struct perf_session *session;
838         bool disabled = false, draining = false;
839         int fd;
840
841         rec->progname = argv[0];
842
843         atexit(record__sig_exit);
844         signal(SIGCHLD, sig_handler);
845         signal(SIGINT, sig_handler);
846         signal(SIGTERM, sig_handler);
847         signal(SIGSEGV, sigsegv_handler);
848
849         if (rec->opts.record_namespaces)
850                 tool->namespace_events = true;
851
852         if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
853                 signal(SIGUSR2, snapshot_sig_handler);
854                 if (rec->opts.auxtrace_snapshot_mode)
855                         trigger_on(&auxtrace_snapshot_trigger);
856                 if (rec->switch_output.enabled)
857                         trigger_on(&switch_output_trigger);
858         } else {
859                 signal(SIGUSR2, SIG_IGN);
860         }
861
862         session = perf_session__new(data, false, tool);
863         if (session == NULL) {
864                 pr_err("Perf session creation failed.\n");
865                 return -1;
866         }
867
868         fd = perf_data__fd(data);
869         rec->session = session;
870
871         record__init_features(rec);
872
873         if (forks) {
874                 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
875                                                     argv, data->is_pipe,
876                                                     workload_exec_failed_signal);
877                 if (err < 0) {
878                         pr_err("Couldn't run the workload!\n");
879                         status = err;
880                         goto out_delete_session;
881                 }
882         }
883
884         if (record__open(rec) != 0) {
885                 err = -1;
886                 goto out_child;
887         }
888
889         err = bpf__apply_obj_config();
890         if (err) {
891                 char errbuf[BUFSIZ];
892
893                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
894                 pr_err("ERROR: Apply config to BPF failed: %s\n",
895                          errbuf);
896                 goto out_child;
897         }
898
899         /*
900          * Normally perf_session__new would do this, but it doesn't have the
901          * evlist.
902          */
903         if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
904                 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
905                 rec->tool.ordered_events = false;
906         }
907
908         if (!rec->evlist->nr_groups)
909                 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
910
911         if (data->is_pipe) {
912                 err = perf_header__write_pipe(fd);
913                 if (err < 0)
914                         goto out_child;
915         } else {
916                 err = perf_session__write_header(session, rec->evlist, fd, false);
917                 if (err < 0)
918                         goto out_child;
919         }
920
921         if (!rec->no_buildid
922             && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
923                 pr_err("Couldn't generate buildids. "
924                        "Use --no-buildid to profile anyway.\n");
925                 err = -1;
926                 goto out_child;
927         }
928
929         machine = &session->machines.host;
930
931         err = record__synthesize(rec, false);
932         if (err < 0)
933                 goto out_child;
934
935         if (rec->realtime_prio) {
936                 struct sched_param param;
937
938                 param.sched_priority = rec->realtime_prio;
939                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
940                         pr_err("Could not set realtime priority.\n");
941                         err = -1;
942                         goto out_child;
943                 }
944         }
945
946         /*
947          * When perf is starting the traced process, all the events
948          * (apart from group members) have enable_on_exec=1 set,
949          * so don't spoil it by prematurely enabling them.
950          */
951         if (!target__none(&opts->target) && !opts->initial_delay)
952                 perf_evlist__enable(rec->evlist);
953
954         /*
955          * Let the child rip
956          */
957         if (forks) {
958                 union perf_event *event;
959                 pid_t tgid;
960
961                 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
962                 if (event == NULL) {
963                         err = -ENOMEM;
964                         goto out_child;
965                 }
966
967                 /*
968                  * Some H/W events are generated before COMM event
969                  * which is emitted during exec(), so perf script
970                  * cannot see a correct process name for those events.
971                  * Synthesize COMM event to prevent it.
972                  */
973                 tgid = perf_event__synthesize_comm(tool, event,
974                                                    rec->evlist->workload.pid,
975                                                    process_synthesized_event,
976                                                    machine);
977                 free(event);
978
979                 if (tgid == -1)
980                         goto out_child;
981
982                 event = malloc(sizeof(event->namespaces) +
983                                (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
984                                machine->id_hdr_size);
985                 if (event == NULL) {
986                         err = -ENOMEM;
987                         goto out_child;
988                 }
989
990                 /*
991                  * Synthesize NAMESPACES event for the command specified.
992                  */
993                 perf_event__synthesize_namespaces(tool, event,
994                                                   rec->evlist->workload.pid,
995                                                   tgid, process_synthesized_event,
996                                                   machine);
997                 free(event);
998
999                 perf_evlist__start_workload(rec->evlist);
1000         }
1001
1002         if (opts->initial_delay) {
1003                 usleep(opts->initial_delay * USEC_PER_MSEC);
1004                 perf_evlist__enable(rec->evlist);
1005         }
1006
1007         trigger_ready(&auxtrace_snapshot_trigger);
1008         trigger_ready(&switch_output_trigger);
1009         perf_hooks__invoke_record_start();
1010         for (;;) {
1011                 unsigned long long hits = rec->samples;
1012
1013                 /*
1014                  * rec->evlist->bkw_mmap_state is possible to be
1015                  * BKW_MMAP_EMPTY here: when done == true and
1016                  * hits != rec->samples in previous round.
1017                  *
1018                  * perf_evlist__toggle_bkw_mmap ensure we never
1019                  * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1020                  */
1021                 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1022                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1023
1024                 if (record__mmap_read_all(rec) < 0) {
1025                         trigger_error(&auxtrace_snapshot_trigger);
1026                         trigger_error(&switch_output_trigger);
1027                         err = -1;
1028                         goto out_child;
1029                 }
1030
1031                 if (auxtrace_record__snapshot_started) {
1032                         auxtrace_record__snapshot_started = 0;
1033                         if (!trigger_is_error(&auxtrace_snapshot_trigger))
1034                                 record__read_auxtrace_snapshot(rec);
1035                         if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1036                                 pr_err("AUX area tracing snapshot failed\n");
1037                                 err = -1;
1038                                 goto out_child;
1039                         }
1040                 }
1041
1042                 if (trigger_is_hit(&switch_output_trigger)) {
1043                         /*
1044                          * If switch_output_trigger is hit, the data in
1045                          * overwritable ring buffer should have been collected,
1046                          * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1047                          *
1048                          * If SIGUSR2 raise after or during record__mmap_read_all(),
1049                          * record__mmap_read_all() didn't collect data from
1050                          * overwritable ring buffer. Read again.
1051                          */
1052                         if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1053                                 continue;
1054                         trigger_ready(&switch_output_trigger);
1055
1056                         /*
1057                          * Reenable events in overwrite ring buffer after
1058                          * record__mmap_read_all(): we should have collected
1059                          * data from it.
1060                          */
1061                         perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1062
1063                         if (!quiet)
1064                                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1065                                         waking);
1066                         waking = 0;
1067                         fd = record__switch_output(rec, false);
1068                         if (fd < 0) {
1069                                 pr_err("Failed to switch to new file\n");
1070                                 trigger_error(&switch_output_trigger);
1071                                 err = fd;
1072                                 goto out_child;
1073                         }
1074
1075                         /* re-arm the alarm */
1076                         if (rec->switch_output.time)
1077                                 alarm(rec->switch_output.time);
1078                 }
1079
1080                 if (hits == rec->samples) {
1081                         if (done || draining)
1082                                 break;
1083                         err = perf_evlist__poll(rec->evlist, -1);
1084                         /*
1085                          * Propagate error, only if there's any. Ignore positive
1086                          * number of returned events and interrupt error.
1087                          */
1088                         if (err > 0 || (err < 0 && errno == EINTR))
1089                                 err = 0;
1090                         waking++;
1091
1092                         if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1093                                 draining = true;
1094                 }
1095
1096                 /*
1097                  * When perf is starting the traced process, at the end events
1098                  * die with the process and we wait for that. Thus no need to
1099                  * disable events in this case.
1100                  */
1101                 if (done && !disabled && !target__none(&opts->target)) {
1102                         trigger_off(&auxtrace_snapshot_trigger);
1103                         perf_evlist__disable(rec->evlist);
1104                         disabled = true;
1105                 }
1106         }
1107         trigger_off(&auxtrace_snapshot_trigger);
1108         trigger_off(&switch_output_trigger);
1109
1110         if (forks && workload_exec_errno) {
1111                 char msg[STRERR_BUFSIZE];
1112                 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1113                 pr_err("Workload failed: %s\n", emsg);
1114                 err = -1;
1115                 goto out_child;
1116         }
1117
1118         if (!quiet)
1119                 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1120
1121         if (target__none(&rec->opts.target))
1122                 record__synthesize_workload(rec, true);
1123
1124 out_child:
1125         if (forks) {
1126                 int exit_status;
1127
1128                 if (!child_finished)
1129                         kill(rec->evlist->workload.pid, SIGTERM);
1130
1131                 wait(&exit_status);
1132
1133                 if (err < 0)
1134                         status = err;
1135                 else if (WIFEXITED(exit_status))
1136                         status = WEXITSTATUS(exit_status);
1137                 else if (WIFSIGNALED(exit_status))
1138                         signr = WTERMSIG(exit_status);
1139         } else
1140                 status = err;
1141
1142         record__synthesize(rec, true);
1143         /* this will be recalculated during process_buildids() */
1144         rec->samples = 0;
1145
1146         if (!err) {
1147                 if (!rec->timestamp_filename) {
1148                         record__finish_output(rec);
1149                 } else {
1150                         fd = record__switch_output(rec, true);
1151                         if (fd < 0) {
1152                                 status = fd;
1153                                 goto out_delete_session;
1154                         }
1155                 }
1156         }
1157
1158         perf_hooks__invoke_record_end();
1159
1160         if (!err && !quiet) {
1161                 char samples[128];
1162                 const char *postfix = rec->timestamp_filename ?
1163                                         ".<timestamp>" : "";
1164
1165                 if (rec->samples && !rec->opts.full_auxtrace)
1166                         scnprintf(samples, sizeof(samples),
1167                                   " (%" PRIu64 " samples)", rec->samples);
1168                 else
1169                         samples[0] = '\0';
1170
1171                 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1172                         perf_data__size(data) / 1024.0 / 1024.0,
1173                         data->file.path, postfix, samples);
1174         }
1175
1176 out_delete_session:
1177         perf_session__delete(session);
1178         return status;
1179 }
1180
1181 static void callchain_debug(struct callchain_param *callchain)
1182 {
1183         static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1184
1185         pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1186
1187         if (callchain->record_mode == CALLCHAIN_DWARF)
1188                 pr_debug("callchain: stack dump size %d\n",
1189                          callchain->dump_size);
1190 }
1191
1192 int record_opts__parse_callchain(struct record_opts *record,
1193                                  struct callchain_param *callchain,
1194                                  const char *arg, bool unset)
1195 {
1196         int ret;
1197         callchain->enabled = !unset;
1198
1199         /* --no-call-graph */
1200         if (unset) {
1201                 callchain->record_mode = CALLCHAIN_NONE;
1202                 pr_debug("callchain: disabled\n");
1203                 return 0;
1204         }
1205
1206         ret = parse_callchain_record_opt(arg, callchain);
1207         if (!ret) {
1208                 /* Enable data address sampling for DWARF unwind. */
1209                 if (callchain->record_mode == CALLCHAIN_DWARF)
1210                         record->sample_address = true;
1211                 callchain_debug(callchain);
1212         }
1213
1214         return ret;
1215 }
1216
1217 int record_parse_callchain_opt(const struct option *opt,
1218                                const char *arg,
1219                                int unset)
1220 {
1221         return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1222 }
1223
1224 int record_callchain_opt(const struct option *opt,
1225                          const char *arg __maybe_unused,
1226                          int unset __maybe_unused)
1227 {
1228         struct callchain_param *callchain = opt->value;
1229
1230         callchain->enabled = true;
1231
1232         if (callchain->record_mode == CALLCHAIN_NONE)
1233                 callchain->record_mode = CALLCHAIN_FP;
1234
1235         callchain_debug(callchain);
1236         return 0;
1237 }
1238
1239 static int perf_record_config(const char *var, const char *value, void *cb)
1240 {
1241         struct record *rec = cb;
1242
1243         if (!strcmp(var, "record.build-id")) {
1244                 if (!strcmp(value, "cache"))
1245                         rec->no_buildid_cache = false;
1246                 else if (!strcmp(value, "no-cache"))
1247                         rec->no_buildid_cache = true;
1248                 else if (!strcmp(value, "skip"))
1249                         rec->no_buildid = true;
1250                 else
1251                         return -1;
1252                 return 0;
1253         }
1254         if (!strcmp(var, "record.call-graph"))
1255                 var = "call-graph.record-mode"; /* fall-through */
1256
1257         return perf_default_config(var, value, cb);
1258 }
1259
1260 struct clockid_map {
1261         const char *name;
1262         int clockid;
1263 };
1264
1265 #define CLOCKID_MAP(n, c)       \
1266         { .name = n, .clockid = (c), }
1267
1268 #define CLOCKID_END     { .name = NULL, }
1269
1270
1271 /*
1272  * Add the missing ones, we need to build on many distros...
1273  */
1274 #ifndef CLOCK_MONOTONIC_RAW
1275 #define CLOCK_MONOTONIC_RAW 4
1276 #endif
1277 #ifndef CLOCK_BOOTTIME
1278 #define CLOCK_BOOTTIME 7
1279 #endif
1280 #ifndef CLOCK_TAI
1281 #define CLOCK_TAI 11
1282 #endif
1283
1284 static const struct clockid_map clockids[] = {
1285         /* available for all events, NMI safe */
1286         CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1287         CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1288
1289         /* available for some events */
1290         CLOCKID_MAP("realtime", CLOCK_REALTIME),
1291         CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1292         CLOCKID_MAP("tai", CLOCK_TAI),
1293
1294         /* available for the lazy */
1295         CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1296         CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1297         CLOCKID_MAP("real", CLOCK_REALTIME),
1298         CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1299
1300         CLOCKID_END,
1301 };
1302
1303 static int parse_clockid(const struct option *opt, const char *str, int unset)
1304 {
1305         struct record_opts *opts = (struct record_opts *)opt->value;
1306         const struct clockid_map *cm;
1307         const char *ostr = str;
1308
1309         if (unset) {
1310                 opts->use_clockid = 0;
1311                 return 0;
1312         }
1313
1314         /* no arg passed */
1315         if (!str)
1316                 return 0;
1317
1318         /* no setting it twice */
1319         if (opts->use_clockid)
1320                 return -1;
1321
1322         opts->use_clockid = true;
1323
1324         /* if its a number, we're done */
1325         if (sscanf(str, "%d", &opts->clockid) == 1)
1326                 return 0;
1327
1328         /* allow a "CLOCK_" prefix to the name */
1329         if (!strncasecmp(str, "CLOCK_", 6))
1330                 str += 6;
1331
1332         for (cm = clockids; cm->name; cm++) {
1333                 if (!strcasecmp(str, cm->name)) {
1334                         opts->clockid = cm->clockid;
1335                         return 0;
1336                 }
1337         }
1338
1339         opts->use_clockid = false;
1340         ui__warning("unknown clockid %s, check man page\n", ostr);
1341         return -1;
1342 }
1343
1344 static int record__parse_mmap_pages(const struct option *opt,
1345                                     const char *str,
1346                                     int unset __maybe_unused)
1347 {
1348         struct record_opts *opts = opt->value;
1349         char *s, *p;
1350         unsigned int mmap_pages;
1351         int ret;
1352
1353         if (!str)
1354                 return -EINVAL;
1355
1356         s = strdup(str);
1357         if (!s)
1358                 return -ENOMEM;
1359
1360         p = strchr(s, ',');
1361         if (p)
1362                 *p = '\0';
1363
1364         if (*s) {
1365                 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1366                 if (ret)
1367                         goto out_free;
1368                 opts->mmap_pages = mmap_pages;
1369         }
1370
1371         if (!p) {
1372                 ret = 0;
1373                 goto out_free;
1374         }
1375
1376         ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1377         if (ret)
1378                 goto out_free;
1379
1380         opts->auxtrace_mmap_pages = mmap_pages;
1381
1382 out_free:
1383         free(s);
1384         return ret;
1385 }
1386
1387 static void switch_output_size_warn(struct record *rec)
1388 {
1389         u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1390         struct switch_output *s = &rec->switch_output;
1391
1392         wakeup_size /= 2;
1393
1394         if (s->size < wakeup_size) {
1395                 char buf[100];
1396
1397                 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1398                 pr_warning("WARNING: switch-output data size lower than "
1399                            "wakeup kernel buffer size (%s) "
1400                            "expect bigger perf.data sizes\n", buf);
1401         }
1402 }
1403
1404 static int switch_output_setup(struct record *rec)
1405 {
1406         struct switch_output *s = &rec->switch_output;
1407         static struct parse_tag tags_size[] = {
1408                 { .tag  = 'B', .mult = 1       },
1409                 { .tag  = 'K', .mult = 1 << 10 },
1410                 { .tag  = 'M', .mult = 1 << 20 },
1411                 { .tag  = 'G', .mult = 1 << 30 },
1412                 { .tag  = 0 },
1413         };
1414         static struct parse_tag tags_time[] = {
1415                 { .tag  = 's', .mult = 1        },
1416                 { .tag  = 'm', .mult = 60       },
1417                 { .tag  = 'h', .mult = 60*60    },
1418                 { .tag  = 'd', .mult = 60*60*24 },
1419                 { .tag  = 0 },
1420         };
1421         unsigned long val;
1422
1423         if (!s->set)
1424                 return 0;
1425
1426         if (!strcmp(s->str, "signal")) {
1427                 s->signal = true;
1428                 pr_debug("switch-output with SIGUSR2 signal\n");
1429                 goto enabled;
1430         }
1431
1432         val = parse_tag_value(s->str, tags_size);
1433         if (val != (unsigned long) -1) {
1434                 s->size = val;
1435                 pr_debug("switch-output with %s size threshold\n", s->str);
1436                 goto enabled;
1437         }
1438
1439         val = parse_tag_value(s->str, tags_time);
1440         if (val != (unsigned long) -1) {
1441                 s->time = val;
1442                 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1443                          s->str, s->time);
1444                 goto enabled;
1445         }
1446
1447         return -1;
1448
1449 enabled:
1450         rec->timestamp_filename = true;
1451         s->enabled              = true;
1452
1453         if (s->size && !rec->opts.no_buffering)
1454                 switch_output_size_warn(rec);
1455
1456         return 0;
1457 }
1458
1459 static const char * const __record_usage[] = {
1460         "perf record [<options>] [<command>]",
1461         "perf record [<options>] -- <command> [<options>]",
1462         NULL
1463 };
1464 const char * const *record_usage = __record_usage;
1465
1466 /*
1467  * XXX Ideally would be local to cmd_record() and passed to a record__new
1468  * because we need to have access to it in record__exit, that is called
1469  * after cmd_record() exits, but since record_options need to be accessible to
1470  * builtin-script, leave it here.
1471  *
1472  * At least we don't ouch it in all the other functions here directly.
1473  *
1474  * Just say no to tons of global variables, sigh.
1475  */
1476 static struct record record = {
1477         .opts = {
1478                 .sample_time         = true,
1479                 .mmap_pages          = UINT_MAX,
1480                 .user_freq           = UINT_MAX,
1481                 .user_interval       = ULLONG_MAX,
1482                 .freq                = 4000,
1483                 .target              = {
1484                         .uses_mmap   = true,
1485                         .default_per_cpu = true,
1486                 },
1487                 .proc_map_timeout     = 500,
1488         },
1489         .tool = {
1490                 .sample         = process_sample_event,
1491                 .fork           = perf_event__process_fork,
1492                 .exit           = perf_event__process_exit,
1493                 .comm           = perf_event__process_comm,
1494                 .namespaces     = perf_event__process_namespaces,
1495                 .mmap           = perf_event__process_mmap,
1496                 .mmap2          = perf_event__process_mmap2,
1497                 .ordered_events = true,
1498         },
1499 };
1500
1501 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1502         "\n\t\t\t\tDefault: fp";
1503
1504 static bool dry_run;
1505
1506 /*
1507  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1508  * with it and switch to use the library functions in perf_evlist that came
1509  * from builtin-record.c, i.e. use record_opts,
1510  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1511  * using pipes, etc.
1512  */
1513 static struct option __record_options[] = {
1514         OPT_CALLBACK('e', "event", &record.evlist, "event",
1515                      "event selector. use 'perf list' to list available events",
1516                      parse_events_option),
1517         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1518                      "event filter", parse_filter),
1519         OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1520                            NULL, "don't record events from perf itself",
1521                            exclude_perf),
1522         OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1523                     "record events on existing process id"),
1524         OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1525                     "record events on existing thread id"),
1526         OPT_INTEGER('r', "realtime", &record.realtime_prio,
1527                     "collect data with this RT SCHED_FIFO priority"),
1528         OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1529                     "collect data without buffering"),
1530         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1531                     "collect raw sample records from all opened counters"),
1532         OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1533                             "system-wide collection from all CPUs"),
1534         OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1535                     "list of cpus to monitor"),
1536         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1537         OPT_STRING('o', "output", &record.data.file.path, "file",
1538                     "output file name"),
1539         OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1540                         &record.opts.no_inherit_set,
1541                         "child tasks do not inherit counters"),
1542         OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1543                     "synthesize non-sample events at the end of output"),
1544         OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1545         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1546         OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1547                      "number of mmap data pages and AUX area tracing mmap pages",
1548                      record__parse_mmap_pages),
1549         OPT_BOOLEAN(0, "group", &record.opts.group,
1550                     "put the counters into a counter group"),
1551         OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1552                            NULL, "enables call-graph recording" ,
1553                            &record_callchain_opt),
1554         OPT_CALLBACK(0, "call-graph", &record.opts,
1555                      "record_mode[,record_size]", record_callchain_help,
1556                      &record_parse_callchain_opt),
1557         OPT_INCR('v', "verbose", &verbose,
1558                     "be more verbose (show counter open errors, etc)"),
1559         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1560         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1561                     "per thread counts"),
1562         OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1563         OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1564                     "Record the sample physical addresses"),
1565         OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1566         OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1567                         &record.opts.sample_time_set,
1568                         "Record the sample timestamps"),
1569         OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1570                         "Record the sample period"),
1571         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1572                     "don't sample"),
1573         OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1574                         &record.no_buildid_cache_set,
1575                         "do not update the buildid cache"),
1576         OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1577                         &record.no_buildid_set,
1578                         "do not collect buildids in perf.data"),
1579         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1580                      "monitor event in cgroup name only",
1581                      parse_cgroups),
1582         OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1583                   "ms to wait before starting measurement after program start"),
1584         OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1585                    "user to profile"),
1586
1587         OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1588                      "branch any", "sample any taken branches",
1589                      parse_branch_stack),
1590
1591         OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1592                      "branch filter mask", "branch stack filter modes",
1593                      parse_branch_stack),
1594         OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1595                     "sample by weight (on special events only)"),
1596         OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1597                     "sample transaction flags (special events only)"),
1598         OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1599                     "use per-thread mmaps"),
1600         OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1601                     "sample selected machine registers on interrupt,"
1602                     " use -I ? to list register names", parse_regs),
1603         OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1604                     "sample selected machine registers on interrupt,"
1605                     " use -I ? to list register names", parse_regs),
1606         OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1607                     "Record running/enabled time of read (:S) events"),
1608         OPT_CALLBACK('k', "clockid", &record.opts,
1609         "clockid", "clockid to use for events, see clock_gettime()",
1610         parse_clockid),
1611         OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1612                           "opts", "AUX area tracing Snapshot Mode", ""),
1613         OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1614                         "per thread proc mmap processing timeout in ms"),
1615         OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1616                     "Record namespaces events"),
1617         OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1618                     "Record context switch events"),
1619         OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1620                          "Configure all used events to run in kernel space.",
1621                          PARSE_OPT_EXCLUSIVE),
1622         OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1623                          "Configure all used events to run in user space.",
1624                          PARSE_OPT_EXCLUSIVE),
1625         OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1626                    "clang binary to use for compiling BPF scriptlets"),
1627         OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1628                    "options passed to clang when compiling BPF scriptlets"),
1629         OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1630                    "file", "vmlinux pathname"),
1631         OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1632                     "Record build-id of all DSOs regardless of hits"),
1633         OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1634                     "append timestamp to output filename"),
1635         OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
1636                     "Record timestamp boundary (time of first/last samples)"),
1637         OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1638                           &record.switch_output.set, "signal,size,time",
1639                           "Switch output when receive SIGUSR2 or cross size,time threshold",
1640                           "signal"),
1641         OPT_BOOLEAN(0, "dry-run", &dry_run,
1642                     "Parse options then exit"),
1643         OPT_END()
1644 };
1645
1646 struct option *record_options = __record_options;
1647
1648 int cmd_record(int argc, const char **argv)
1649 {
1650         int err;
1651         struct record *rec = &record;
1652         char errbuf[BUFSIZ];
1653
1654 #ifndef HAVE_LIBBPF_SUPPORT
1655 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1656         set_nobuild('\0', "clang-path", true);
1657         set_nobuild('\0', "clang-opt", true);
1658 # undef set_nobuild
1659 #endif
1660
1661 #ifndef HAVE_BPF_PROLOGUE
1662 # if !defined (HAVE_DWARF_SUPPORT)
1663 #  define REASON  "NO_DWARF=1"
1664 # elif !defined (HAVE_LIBBPF_SUPPORT)
1665 #  define REASON  "NO_LIBBPF=1"
1666 # else
1667 #  define REASON  "this architecture doesn't support BPF prologue"
1668 # endif
1669 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1670         set_nobuild('\0', "vmlinux", true);
1671 # undef set_nobuild
1672 # undef REASON
1673 #endif
1674
1675         rec->evlist = perf_evlist__new();
1676         if (rec->evlist == NULL)
1677                 return -ENOMEM;
1678
1679         err = perf_config(perf_record_config, rec);
1680         if (err)
1681                 return err;
1682
1683         argc = parse_options(argc, argv, record_options, record_usage,
1684                             PARSE_OPT_STOP_AT_NON_OPTION);
1685         if (quiet)
1686                 perf_quiet_option();
1687
1688         /* Make system wide (-a) the default target. */
1689         if (!argc && target__none(&rec->opts.target))
1690                 rec->opts.target.system_wide = true;
1691
1692         if (nr_cgroups && !rec->opts.target.system_wide) {
1693                 usage_with_options_msg(record_usage, record_options,
1694                         "cgroup monitoring only available in system-wide mode");
1695
1696         }
1697         if (rec->opts.record_switch_events &&
1698             !perf_can_record_switch_events()) {
1699                 ui__error("kernel does not support recording context switch events\n");
1700                 parse_options_usage(record_usage, record_options, "switch-events", 0);
1701                 return -EINVAL;
1702         }
1703
1704         if (switch_output_setup(rec)) {
1705                 parse_options_usage(record_usage, record_options, "switch-output", 0);
1706                 return -EINVAL;
1707         }
1708
1709         if (rec->switch_output.time) {
1710                 signal(SIGALRM, alarm_sig_handler);
1711                 alarm(rec->switch_output.time);
1712         }
1713
1714         if (!rec->itr) {
1715                 rec->itr = auxtrace_record__init(rec->evlist, &err);
1716                 if (err)
1717                         goto out;
1718         }
1719
1720         err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1721                                               rec->opts.auxtrace_snapshot_opts);
1722         if (err)
1723                 goto out;
1724
1725         /*
1726          * Allow aliases to facilitate the lookup of symbols for address
1727          * filters. Refer to auxtrace_parse_filters().
1728          */
1729         symbol_conf.allow_aliases = true;
1730
1731         symbol__init(NULL);
1732
1733         err = auxtrace_parse_filters(rec->evlist);
1734         if (err)
1735                 goto out;
1736
1737         if (dry_run)
1738                 goto out;
1739
1740         err = bpf__setup_stdout(rec->evlist);
1741         if (err) {
1742                 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1743                 pr_err("ERROR: Setup BPF stdout failed: %s\n",
1744                          errbuf);
1745                 goto out;
1746         }
1747
1748         err = -ENOMEM;
1749
1750         if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
1751                 pr_warning(
1752 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1753 "check /proc/sys/kernel/kptr_restrict.\n\n"
1754 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1755 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1756 "Samples in kernel modules won't be resolved at all.\n\n"
1757 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1758 "even with a suitable vmlinux or kallsyms file.\n\n");
1759
1760         if (rec->no_buildid_cache || rec->no_buildid) {
1761                 disable_buildid_cache();
1762         } else if (rec->switch_output.enabled) {
1763                 /*
1764                  * In 'perf record --switch-output', disable buildid
1765                  * generation by default to reduce data file switching
1766                  * overhead. Still generate buildid if they are required
1767                  * explicitly using
1768                  *
1769                  *  perf record --switch-output --no-no-buildid \
1770                  *              --no-no-buildid-cache
1771                  *
1772                  * Following code equals to:
1773                  *
1774                  * if ((rec->no_buildid || !rec->no_buildid_set) &&
1775                  *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1776                  *         disable_buildid_cache();
1777                  */
1778                 bool disable = true;
1779
1780                 if (rec->no_buildid_set && !rec->no_buildid)
1781                         disable = false;
1782                 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1783                         disable = false;
1784                 if (disable) {
1785                         rec->no_buildid = true;
1786                         rec->no_buildid_cache = true;
1787                         disable_buildid_cache();
1788                 }
1789         }
1790
1791         if (record.opts.overwrite)
1792                 record.opts.tail_synthesize = true;
1793
1794         if (rec->evlist->nr_entries == 0 &&
1795             __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
1796                 pr_err("Not enough memory for event selector list\n");
1797                 goto out;
1798         }
1799
1800         if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1801                 rec->opts.no_inherit = true;
1802
1803         err = target__validate(&rec->opts.target);
1804         if (err) {
1805                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1806                 ui__warning("%s", errbuf);
1807         }
1808
1809         err = target__parse_uid(&rec->opts.target);
1810         if (err) {
1811                 int saved_errno = errno;
1812
1813                 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1814                 ui__error("%s", errbuf);
1815
1816                 err = -saved_errno;
1817                 goto out;
1818         }
1819
1820         /* Enable ignoring missing threads when -u/-p option is defined. */
1821         rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
1822
1823         err = -ENOMEM;
1824         if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1825                 usage_with_options(record_usage, record_options);
1826
1827         err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1828         if (err)
1829                 goto out;
1830
1831         /*
1832          * We take all buildids when the file contains
1833          * AUX area tracing data because we do not decode the
1834          * trace because it would take too long.
1835          */
1836         if (rec->opts.full_auxtrace)
1837                 rec->buildid_all = true;
1838
1839         if (record_opts__config(&rec->opts)) {
1840                 err = -EINVAL;
1841                 goto out;
1842         }
1843
1844         err = __cmd_record(&record, argc, argv);
1845 out:
1846         perf_evlist__delete(rec->evlist);
1847         symbol__exit();
1848         auxtrace_record__free(rec->itr);
1849         return err;
1850 }
1851
1852 static void snapshot_sig_handler(int sig __maybe_unused)
1853 {
1854         struct record *rec = &record;
1855
1856         if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1857                 trigger_hit(&auxtrace_snapshot_trigger);
1858                 auxtrace_record__snapshot_started = 1;
1859                 if (auxtrace_record__snapshot_start(record.itr))
1860                         trigger_error(&auxtrace_snapshot_trigger);
1861         }
1862
1863         if (switch_output_signal(rec))
1864                 trigger_hit(&switch_output_trigger);
1865 }
1866
1867 static void alarm_sig_handler(int sig __maybe_unused)
1868 {
1869         struct record *rec = &record;
1870
1871         if (switch_output_time(rec))
1872                 trigger_hit(&switch_output_trigger);
1873 }