From 8882095b1d4d785524a7a4df8e04e35cfd039142 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 24 Jul 2025 09:33:00 -0700 Subject: [PATCH] perf sample: Remove arch notion of sample parsing By definition arch sample parsing and synthesis will inhibit certain kinds of cross-platform record then analysis (report, script, etc.). Remove arch_perf_parse_sample_weight and arch_perf_synthesize_sample_weight replacing with a common implementation. Combine perf_sample p_stage_cyc and retire_lat as weight3 to capture the differing uses regardless of compiled for architecture. Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250724163302.596743-21-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/arch/powerpc/util/event.c | 26 --------------------- tools/perf/arch/x86/tests/sample-parsing.c | 4 ++-- tools/perf/arch/x86/util/event.c | 27 ---------------------- tools/perf/builtin-script.c | 2 +- tools/perf/util/dlfilter.c | 2 +- tools/perf/util/event.h | 2 -- tools/perf/util/evsel.c | 17 ++++++++++---- tools/perf/util/hist.c | 4 ++-- tools/perf/util/hist.h | 3 ++- tools/perf/util/intel-tpebs.c | 4 ++-- tools/perf/util/sample.h | 6 ++--- tools/perf/util/session.c | 2 +- tools/perf/util/sort.c | 7 +++--- tools/perf/util/synthetic-events.c | 10 ++++++-- 14 files changed, 36 insertions(+), 80 deletions(-) diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c index 77d8cc2b5691..024ac8b54c33 100644 --- a/tools/perf/arch/powerpc/util/event.c +++ b/tools/perf/arch/powerpc/util/event.c @@ -11,32 +11,6 @@ #include "../../../util/debug.h" #include "../../../util/sample.h" -void arch_perf_parse_sample_weight(struct perf_sample *data, - const __u64 *array, u64 type) -{ - union perf_sample_weight weight; - - weight.full = *array; - if (type & PERF_SAMPLE_WEIGHT) - data->weight = weight.full; - else { - data->weight = weight.var1_dw; - data->ins_lat = weight.var2_w; - data->p_stage_cyc = weight.var3_w; - } -} - -void arch_perf_synthesize_sample_weight(const struct perf_sample *data, - __u64 *array, u64 type) -{ - *array = data->weight; - - if (type & PERF_SAMPLE_WEIGHT_STRUCT) { - *array &= 0xffffffff; - *array |= ((u64)data->ins_lat << 32); - } -} - const char *arch_perf_header_entry(const char *se_header) { if (!strcmp(se_header, "Local INSTR Latency")) diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c index a061e8619267..22feec23e53d 100644 --- a/tools/perf/arch/x86/tests/sample-parsing.c +++ b/tools/perf/arch/x86/tests/sample-parsing.c @@ -29,7 +29,7 @@ static bool samples_same(const struct perf_sample *s1, { if (type & PERF_SAMPLE_WEIGHT_STRUCT) { COMP(ins_lat); - COMP(retire_lat); + COMP(weight3); } return true; @@ -50,7 +50,7 @@ static int do_test(u64 sample_type) struct perf_sample sample = { .weight = 101, .ins_lat = 102, - .retire_lat = 103, + .weight3 = 103, }; struct perf_sample sample_out; size_t i, sz, bufsz; diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index a0400707180c..576c1c36046c 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -92,33 +92,6 @@ int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool, #endif -void arch_perf_parse_sample_weight(struct perf_sample *data, - const __u64 *array, u64 type) -{ - union perf_sample_weight weight; - - weight.full = *array; - if (type & PERF_SAMPLE_WEIGHT) - data->weight = weight.full; - else { - data->weight = weight.var1_dw; - data->ins_lat = weight.var2_w; - data->retire_lat = weight.var3_w; - } -} - -void arch_perf_synthesize_sample_weight(const struct perf_sample *data, - __u64 *array, u64 type) -{ - *array = data->weight; - - if (type & PERF_SAMPLE_WEIGHT_STRUCT) { - *array &= 0xffffffff; - *array |= ((u64)data->ins_lat << 32); - *array |= ((u64)data->retire_lat << 48); - } -} - const char *arch_perf_header_entry(const char *se_header) { if (!strcmp(se_header, "Local Pipeline Stage Cycle")) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index f2b5620165b4..d9fbdcf72f25 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2252,7 +2252,7 @@ static void process_event(struct perf_script *script, fprintf(fp, "%16" PRIu16, sample->ins_lat); if (PRINT_FIELD(RETIRE_LAT)) - fprintf(fp, "%16" PRIu16, sample->retire_lat); + fprintf(fp, "%16" PRIu16, sample->weight3); if (PRINT_FIELD(CGROUP)) { const char *cgrp_name; diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c index ddacef881af2..c0afcbd954f8 100644 --- a/tools/perf/util/dlfilter.c +++ b/tools/perf/util/dlfilter.c @@ -513,6 +513,7 @@ int dlfilter__do_filter_event(struct dlfilter *d, d->d_addr_al = &d_addr_al; d_sample.size = sizeof(d_sample); + d_sample.p_stage_cyc = sample->weight3; d_ip_al.size = 0; /* To indicate d_ip_al is not initialized */ d_addr_al.size = 0; /* To indicate d_addr_al is not initialized */ @@ -526,7 +527,6 @@ int dlfilter__do_filter_event(struct dlfilter *d, ASSIGN(period); ASSIGN(weight); ASSIGN(ins_lat); - ASSIGN(p_stage_cyc); ASSIGN(transaction); ASSIGN(insn_cnt); ASSIGN(cyc_cnt); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 67ad4a2014bc..b13385a6068b 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -391,8 +391,6 @@ extern unsigned int proc_map_timeout; #define PAGE_SIZE_NAME_LEN 32 char *get_page_size_name(u64 size, char *str); -void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type); -void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type); const char *arch_perf_header_entry(const char *se_header); int arch_support_sort_key(const char *sort_key); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index aa6efcc4404c..3d27e9bdd66b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2880,11 +2880,18 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size) return 0; } -void __weak arch_perf_parse_sample_weight(struct perf_sample *data, - const __u64 *array, - u64 type __maybe_unused) +static void perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type) { - data->weight = *array; + union perf_sample_weight weight; + + weight.full = *array; + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { + data->weight = weight.var1_dw; + data->ins_lat = weight.var2_w; + data->weight3 = weight.var3_w; + } else { + data->weight = weight.full; + } } u64 evsel__bitfield_swap_branch_flags(u64 value) @@ -3270,7 +3277,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_WEIGHT_TYPE) { OVERFLOW_CHECK_u64(array); - arch_perf_parse_sample_weight(data, array, type); + perf_parse_sample_weight(data, array, type); array++; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index afc6855327ab..64ff427040c3 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -829,7 +829,7 @@ __hists__add_entry(struct hists *hists, .period = sample->period, .weight1 = sample->weight, .weight2 = sample->ins_lat, - .weight3 = sample->p_stage_cyc, + .weight3 = sample->weight3, .latency = al->latency, }, .parent = sym_parent, @@ -846,7 +846,7 @@ __hists__add_entry(struct hists *hists, .time = hist_time(sample->time), .weight = sample->weight, .ins_lat = sample->ins_lat, - .p_stage_cyc = sample->p_stage_cyc, + .weight3 = sample->weight3, .simd_flags = sample->simd_flags, }, *he = hists__findnew_entry(hists, &entry, al, sample_self); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index c64254088fc7..70438d03ca9c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -255,7 +255,8 @@ struct hist_entry { u64 code_page_size; u64 weight; u64 ins_lat; - u64 p_stage_cyc; + /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */ + u64 weight3; s32 socket; s32 cpu; int parallelism; diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c index 3b92ebf5c112..8c9aee157ec4 100644 --- a/tools/perf/util/intel-tpebs.c +++ b/tools/perf/util/intel-tpebs.c @@ -210,8 +210,8 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused, * latency value will be used. Save the number of samples and the sum of * retire latency value for each event. */ - t->last = sample->retire_lat; - update_stats(&t->stats, sample->retire_lat); + t->last = sample->weight3; + update_stats(&t->stats, sample->weight3); mutex_unlock(tpebs_mtx_get()); return 0; } diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index 0e96240052e9..fae834144ef4 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -104,10 +104,8 @@ struct perf_sample { u8 cpumode; u16 misc; u16 ins_lat; - union { - u16 p_stage_cyc; - u16 retire_lat; - }; + /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */ + u16 weight3; bool no_hw_idx; /* No hw_idx collected in branch_stack */ char insn[MAX_INSN]; void *raw_data; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 2a79e6844f36..26ae078278cd 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1099,7 +1099,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, printf("... weight: %" PRIu64 "", sample->weight); if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { printf(",0x%"PRIx16"", sample->ins_lat); - printf(",0x%"PRIx16"", sample->p_stage_cyc); + printf(",0x%"PRIx16"", sample->weight3); } printf("\n"); } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 7969d64a47bf..0ba2ce1b1c07 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1884,21 +1884,20 @@ struct sort_entry sort_global_ins_lat = { static int64_t sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) { - return left->p_stage_cyc - right->p_stage_cyc; + return left->weight3 - right->weight3; } static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, - he->p_stage_cyc * he->stat.nr_events); + return repsep_snprintf(bf, size, "%-*u", width, he->weight3 * he->stat.nr_events); } static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc); + return repsep_snprintf(bf, size, "%-*u", width, he->weight3); } struct sort_entry sort_local_p_stage_cyc = { diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index e7ca3f5eb493..cb2c1ace304a 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1573,10 +1573,16 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, return result; } -void __weak arch_perf_synthesize_sample_weight(const struct perf_sample *data, +static void perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type __maybe_unused) { *array = data->weight; + + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { + *array &= 0xffffffff; + *array |= ((u64)data->ins_lat << 32); + *array |= ((u64)data->weight3 << 48); + } } static __u64 *copy_read_group_values(__u64 *array, __u64 read_format, @@ -1736,7 +1742,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo } if (type & PERF_SAMPLE_WEIGHT_TYPE) { - arch_perf_synthesize_sample_weight(sample, array, type); + perf_synthesize_sample_weight(sample, array, type); array++; } -- 2.25.1