perf callchain: Allow symbols to be optional when resolving a callchain
authorIan Rogers <irogers@google.com>
Mon, 9 Sep 2024 20:37:40 +0000 (13:37 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 10 Sep 2024 20:32:47 +0000 (17:32 -0300)
In uses like 'perf inject' it is not necessary to gather the symbol for
each call chain location, the map for the sample IP is wanted so that
build IDs and the like can be injected. Make gathering the symbol in the
callchain_cursor optional.

For a 'perf inject -B' command this lowers the peak RSS from 54.1MB to
29.6MB by avoiding loading symbols.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Anne Macedo <retpolanne@posteo.net>
Cc: Casey Chen <cachen@purestorage.com>
Cc: Colin Ian King <colin.i.king@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sun Haiyong <sunhaiyong@loongson.cn>
Link: https://lore.kernel.org/r/20240909203740.143492-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-inject.c
tools/perf/util/callchain.c
tools/perf/util/callchain.h
tools/perf/util/machine.c
tools/perf/util/machine.h

index 9eb72ff48d88e8d6ea597b07dcc14fcea42bc341..d6989195a061ffe589c64ec49e609bb7a866fc29 100644 (file)
@@ -942,7 +942,7 @@ int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *e
        }
 
        sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH,
-                                       mark_dso_hit_callback, &args);
+                                       /*symbols=*/false, mark_dso_hit_callback, &args);
 
        thread__put(thread);
 repipe:
index 0d608e875fe96d39b1dc0602cd1a45a8a5ee3791..0c7564747a14e539f49fb47073730d763f1a826d 100644 (file)
@@ -1800,7 +1800,7 @@ s64 callchain_avg_cycles(struct callchain_node *cnode)
 
 int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
                                    struct perf_sample *sample, int max_stack,
-                                   callchain_iter_fn cb, void *data)
+                                   bool symbols, callchain_iter_fn cb, void *data)
 {
        struct callchain_cursor *cursor = get_tls_callchain_cursor();
        int ret;
@@ -1809,9 +1809,9 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
                return -ENOMEM;
 
        /* Fill in the callchain. */
-       ret = thread__resolve_callchain(thread, cursor, evsel, sample,
-                                       /*parent=*/NULL, /*root_al=*/NULL,
-                                       max_stack);
+       ret = __thread__resolve_callchain(thread, cursor, evsel, sample,
+                                         /*parent=*/NULL, /*root_al=*/NULL,
+                                         max_stack, symbols);
        if (ret)
                return ret;
 
index 76891f8e237303631f98223f4783471a40ae8179..86ed9e4d04f9ee7b019efbf7e8f22bc82db2d752 100644 (file)
@@ -315,6 +315,6 @@ typedef int (*callchain_iter_fn)(struct callchain_cursor_node *node, void *data)
 
 int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
                                    struct perf_sample *sample, int max_stack,
-                                   callchain_iter_fn cb, void *data);
+                                   bool symbols, callchain_iter_fn cb, void *data);
 
 #endif /* __PERF_CALLCHAIN_H */
index 5783b96fb988d254506429eb154f36dd950b3ee4..fad227b625d155c5c1f3d3abc37bfa4c3bb8c7d2 100644 (file)
@@ -2060,7 +2060,8 @@ static int add_callchain_ip(struct thread *thread,
                            bool branch,
                            struct branch_flags *flags,
                            struct iterations *iter,
-                           u64 branch_from)
+                           u64 branch_from,
+                           bool symbols)
 {
        struct map_symbol ms = {};
        struct addr_location al;
@@ -2099,7 +2100,8 @@ static int add_callchain_ip(struct thread *thread,
                        }
                        goto out;
                }
-               thread__find_symbol(thread, *cpumode, ip, &al);
+               if (symbols)
+                       thread__find_symbol(thread, *cpumode, ip, &al);
        }
 
        if (al.sym != NULL) {
@@ -2228,7 +2230,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread,
                                       struct symbol **parent,
                                       struct addr_location *root_al,
                                       u64 branch_from,
-                                      bool callee, int end)
+                                      bool callee, int end,
+                                      bool symbols)
 {
        struct ip_callchain *chain = sample->callchain;
        u8 cpumode = PERF_RECORD_MISC_USER;
@@ -2238,7 +2241,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread,
                for (i = 0; i < end + 1; i++) {
                        err = add_callchain_ip(thread, cursor, parent,
                                               root_al, &cpumode, chain->ips[i],
-                                              false, NULL, NULL, branch_from);
+                                              false, NULL, NULL, branch_from,
+                                              symbols);
                        if (err)
                                return err;
                }
@@ -2248,7 +2252,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread,
        for (i = end; i >= 0; i--) {
                err = add_callchain_ip(thread, cursor, parent,
                                       root_al, &cpumode, chain->ips[i],
-                                      false, NULL, NULL, branch_from);
+                                      false, NULL, NULL, branch_from,
+                                      symbols);
                if (err)
                        return err;
        }
@@ -2291,7 +2296,8 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
                                    struct symbol **parent,
                                    struct addr_location *root_al,
                                    u64 *branch_from,
-                                   bool callee)
+                                   bool callee,
+                                   bool symbols)
 {
        struct branch_stack *lbr_stack = sample->branch_stack;
        struct branch_entry *entries = perf_sample__branch_entries(sample);
@@ -2324,7 +2330,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
                err = add_callchain_ip(thread, cursor, parent,
                                       root_al, &cpumode, ip,
                                       true, flags, NULL,
-                                      *branch_from);
+                                      *branch_from, symbols);
                if (err)
                        return err;
 
@@ -2349,7 +2355,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
                        err = add_callchain_ip(thread, cursor, parent,
                                               root_al, &cpumode, ip,
                                               true, flags, NULL,
-                                              *branch_from);
+                                              *branch_from, symbols);
                        if (err)
                                return err;
                        save_lbr_cursor_node(thread, cursor, i);
@@ -2364,7 +2370,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
                err = add_callchain_ip(thread, cursor, parent,
                                       root_al, &cpumode, ip,
                                       true, flags, NULL,
-                                      *branch_from);
+                                      *branch_from, symbols);
                if (err)
                        return err;
                save_lbr_cursor_node(thread, cursor, i);
@@ -2378,7 +2384,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
                err = add_callchain_ip(thread, cursor, parent,
                                root_al, &cpumode, ip,
                                true, flags, NULL,
-                               *branch_from);
+                               *branch_from, symbols);
                if (err)
                        return err;
        }
@@ -2545,7 +2551,8 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
                                        struct symbol **parent,
                                        struct addr_location *root_al,
                                        int max_stack,
-                                       unsigned int max_lbr)
+                                       unsigned int max_lbr,
+                                       bool symbols)
 {
        bool callee = (callchain_param.order == ORDER_CALLEE);
        struct ip_callchain *chain = sample->callchain;
@@ -2587,12 +2594,12 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
                /* Add kernel ip */
                err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
                                                  parent, root_al, branch_from,
-                                                 true, i);
+                                                 true, i, symbols);
                if (err)
                        goto error;
 
                err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
-                                              root_al, &branch_from, true);
+                                              root_al, &branch_from, true, symbols);
                if (err)
                        goto error;
 
@@ -2609,14 +2616,14 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
                                goto error;
                }
                err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
-                                              root_al, &branch_from, false);
+                                              root_al, &branch_from, false, symbols);
                if (err)
                        goto error;
 
                /* Add kernel ip */
                err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
                                                  parent, root_al, branch_from,
-                                                 false, i);
+                                                 false, i, symbols);
                if (err)
                        goto error;
        }
@@ -2630,7 +2637,7 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
                             struct callchain_cursor *cursor,
                             struct symbol **parent,
                             struct addr_location *root_al,
-                            u8 *cpumode, int ent)
+                            u8 *cpumode, int ent, bool symbols)
 {
        int err = 0;
 
@@ -2640,7 +2647,7 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
                if (ip >= PERF_CONTEXT_MAX) {
                        err = add_callchain_ip(thread, cursor, parent,
                                               root_al, cpumode, ip,
-                                              false, NULL, NULL, 0);
+                                              false, NULL, NULL, 0, symbols);
                        break;
                }
        }
@@ -2662,7 +2669,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
                                            struct perf_sample *sample,
                                            struct symbol **parent,
                                            struct addr_location *root_al,
-                                           int max_stack)
+                                           int max_stack,
+                                           bool symbols)
 {
        struct branch_stack *branch = sample->branch_stack;
        struct branch_entry *entries = perf_sample__branch_entries(sample);
@@ -2682,7 +2690,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 
                err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
                                                   root_al, max_stack,
-                                                  !env ? 0 : env->max_branches);
+                                                  !env ? 0 : env->max_branches,
+                                                  symbols);
                if (err)
                        return (err < 0) ? err : 0;
        }
@@ -2747,13 +2756,14 @@ static int thread__resolve_callchain_sample(struct thread *thread,
                                               root_al,
                                               NULL, be[i].to,
                                               true, &be[i].flags,
-                                              NULL, be[i].from);
+                                              NULL, be[i].from, symbols);
 
-                       if (!err)
+                       if (!err) {
                                err = add_callchain_ip(thread, cursor, parent, root_al,
                                                       NULL, be[i].from,
                                                       true, &be[i].flags,
-                                                      &iter[i], 0);
+                                                      &iter[i], 0, symbols);
+                       }
                        if (err == -EINVAL)
                                break;
                        if (err)
@@ -2769,7 +2779,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 check_calls:
        if (chain && callchain_param.order != ORDER_CALLEE) {
                err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
-                                       &cpumode, chain->nr - first_call);
+                                       &cpumode, chain->nr - first_call, symbols);
                if (err)
                        return (err < 0) ? err : 0;
        }
@@ -2791,7 +2801,7 @@ check_calls:
                        ++nr_entries;
                else if (callchain_param.order != ORDER_CALLEE) {
                        err = find_prev_cpumode(chain, thread, cursor, parent,
-                                               root_al, &cpumode, j);
+                                               root_al, &cpumode, j, symbols);
                        if (err)
                                return (err < 0) ? err : 0;
                        continue;
@@ -2818,8 +2828,8 @@ check_calls:
                        if (leaf_frame_caller && leaf_frame_caller != ip) {
 
                                err = add_callchain_ip(thread, cursor, parent,
-                                              root_al, &cpumode, leaf_frame_caller,
-                                              false, NULL, NULL, 0);
+                                               root_al, &cpumode, leaf_frame_caller,
+                                               false, NULL, NULL, 0, symbols);
                                if (err)
                                        return (err < 0) ? err : 0;
                        }
@@ -2827,7 +2837,7 @@ check_calls:
 
                err = add_callchain_ip(thread, cursor, parent,
                                       root_al, &cpumode, ip,
-                                      false, NULL, NULL, 0);
+                                      false, NULL, NULL, 0, symbols);
 
                if (err)
                        return (err < 0) ? err : 0;
@@ -2907,7 +2917,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread,
                                            struct callchain_cursor *cursor,
                                            struct evsel *evsel,
                                            struct perf_sample *sample,
-                                           int max_stack)
+                                           int max_stack, bool symbols)
 {
        /* Can we do dwarf post unwind? */
        if (!((evsel->core.attr.sample_type & PERF_SAMPLE_REGS_USER) &&
@@ -2919,17 +2929,21 @@ static int thread__resolve_callchain_unwind(struct thread *thread,
            (!sample->user_stack.size))
                return 0;
 
+       if (!symbols)
+               pr_debug("Not resolving symbols with an unwinder isn't currently supported\n");
+
        return unwind__get_entries(unwind_entry, cursor,
                                   thread, sample, max_stack, false);
 }
 
-int thread__resolve_callchain(struct thread *thread,
-                             struct callchain_cursor *cursor,
-                             struct evsel *evsel,
-                             struct perf_sample *sample,
-                             struct symbol **parent,
-                             struct addr_location *root_al,
-                             int max_stack)
+int __thread__resolve_callchain(struct thread *thread,
+                               struct callchain_cursor *cursor,
+                               struct evsel *evsel,
+                               struct perf_sample *sample,
+                               struct symbol **parent,
+                               struct addr_location *root_al,
+                               int max_stack,
+                               bool symbols)
 {
        int ret = 0;
 
@@ -2942,22 +2956,22 @@ int thread__resolve_callchain(struct thread *thread,
                ret = thread__resolve_callchain_sample(thread, cursor,
                                                       evsel, sample,
                                                       parent, root_al,
-                                                      max_stack);
+                                                      max_stack, symbols);
                if (ret)
                        return ret;
                ret = thread__resolve_callchain_unwind(thread, cursor,
                                                       evsel, sample,
-                                                      max_stack);
+                                                      max_stack, symbols);
        } else {
                ret = thread__resolve_callchain_unwind(thread, cursor,
                                                       evsel, sample,
-                                                      max_stack);
+                                                      max_stack, symbols);
                if (ret)
                        return ret;
                ret = thread__resolve_callchain_sample(thread, cursor,
                                                       evsel, sample,
                                                       parent, root_al,
-                                                      max_stack);
+                                                      max_stack, symbols);
        }
 
        return ret;
index a687876e3453d4f671d2e838be4bdfe1b40067da..2e5a4cb342d82c2261ec0500a4fd71090bacbe7f 100644 (file)
@@ -178,13 +178,32 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
 
 struct callchain_cursor;
 
-int thread__resolve_callchain(struct thread *thread,
-                             struct callchain_cursor *cursor,
-                             struct evsel *evsel,
-                             struct perf_sample *sample,
-                             struct symbol **parent,
-                             struct addr_location *root_al,
-                             int max_stack);
+int __thread__resolve_callchain(struct thread *thread,
+                               struct callchain_cursor *cursor,
+                               struct evsel *evsel,
+                               struct perf_sample *sample,
+                               struct symbol **parent,
+                               struct addr_location *root_al,
+                               int max_stack,
+                               bool symbols);
+
+static inline int thread__resolve_callchain(struct thread *thread,
+                                           struct callchain_cursor *cursor,
+                                           struct evsel *evsel,
+                                           struct perf_sample *sample,
+                                           struct symbol **parent,
+                                           struct addr_location *root_al,
+                                           int max_stack)
+{
+       return __thread__resolve_callchain(thread,
+                                          cursor,
+                                          evsel,
+                                          sample,
+                                          parent,
+                                          root_al,
+                                          max_stack,
+                                          /*symbols=*/true);
+}
 
 /*
  * Default guest kernel is defined by parameter --guestkallsyms