perf stat: Add topdown metrics in the default perf stat on the hybrid machine
authorZhengjun Xing <zhengjun.xing@linux.intel.com>
Thu, 21 Jul 2022 06:57:06 +0000 (14:57 +0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 29 Jul 2022 16:43:34 +0000 (13:43 -0300)
Topdown metrics are missed in the default perf stat on the hybrid machine,
add Topdown metrics in default perf stat for hybrid systems.

Currently, we support the perf metrics Topdown for the p-core PMU in the
perf stat default, the perf metrics Topdown support for e-core PMU will be
implemented later separately. Refactor the code adds two x86 specific
functions. Widen the size of the event name column by 7 chars, so that all
metrics after the "#" become aligned again.

The perf metrics topdown feature is supported on the cpu_core of ADL. The
dedicated perf metrics counter and the fixed counter 3 are used for the
topdown events. Adding the topdown metrics doesn't trigger multiplexing.

Before:

 # ./perf  stat  -a true

 Performance counter stats for 'system wide':

             53.70 msec cpu-clock                 #   25.736 CPUs utilized
                80      context-switches          #    1.490 K/sec
                24      cpu-migrations            #  446.951 /sec
                52      page-faults               #  968.394 /sec
         2,788,555      cpu_core/cycles/          #   51.931 M/sec
           851,129      cpu_atom/cycles/          #   15.851 M/sec
         2,974,030      cpu_core/instructions/    #   55.385 M/sec
           416,919      cpu_atom/instructions/    #    7.764 M/sec
           586,136      cpu_core/branches/        #   10.916 M/sec
            79,872      cpu_atom/branches/        #    1.487 M/sec
            14,220      cpu_core/branch-misses/   #  264.819 K/sec
             7,691      cpu_atom/branch-misses/   #  143.229 K/sec

       0.002086438 seconds time elapsed

After:

 # ./perf stat  -a true

 Performance counter stats for 'system wide':

             61.39 msec cpu-clock                        #   24.874 CPUs utilized
                76      context-switches                 #    1.238 K/sec
                24      cpu-migrations                   #  390.968 /sec
                52      page-faults                      #  847.097 /sec
         2,753,695      cpu_core/cycles/                 #   44.859 M/sec
           903,899      cpu_atom/cycles/                 #   14.725 M/sec
         2,927,529      cpu_core/instructions/           #   47.690 M/sec
           428,498      cpu_atom/instructions/           #    6.980 M/sec
           581,299      cpu_core/branches/               #    9.470 M/sec
            83,409      cpu_atom/branches/               #    1.359 M/sec
            13,641      cpu_core/branch-misses/          #  222.216 K/sec
             8,008      cpu_atom/branch-misses/          #  130.453 K/sec
        14,761,308      cpu_core/slots/                  #  240.466 M/sec
         3,288,625      cpu_core/topdown-retiring/       #     22.3% retiring
         1,323,323      cpu_core/topdown-bad-spec/       #      9.0% bad speculation
         5,477,470      cpu_core/topdown-fe-bound/       #     37.1% frontend bound
         4,679,199      cpu_core/topdown-be-bound/       #     31.7% backend bound
           646,194      cpu_core/topdown-heavy-ops/      #      4.4% heavy operations       #     17.9% light operations
         1,244,999      cpu_core/topdown-br-mispredict/  #      8.4% branch mispredict      #      0.5% machine clears
         3,891,800      cpu_core/topdown-fetch-lat/      #     26.4% fetch latency          #     10.7% fetch bandwidth
         1,879,034      cpu_core/topdown-mem-bound/      #     12.7% memory bound           #     19.0% Core bound

       0.002467839 seconds time elapsed

Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Acked-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220721065706.2886112-6-zhengjun.xing@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/arch/x86/util/evlist.c
tools/perf/arch/x86/util/topdown.c
tools/perf/arch/x86/util/topdown.h
tools/perf/builtin-stat.c
tools/perf/util/stat-display.c
tools/perf/util/topdown.c
tools/perf/util/topdown.h

index c83f8c11735faf3812cc5998a842b206e4dc1e27..cb59ce9b9638046a1eca01b9644bf300f3adc79d 100644 (file)
@@ -3,12 +3,9 @@
 #include "util/pmu.h"
 #include "util/evlist.h"
 #include "util/parse-events.h"
-#include "topdown.h"
 #include "util/event.h"
 #include "util/pmu-hybrid.h"
-
-#define TOPDOWN_L1_EVENTS      "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}"
-#define TOPDOWN_L2_EVENTS      "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}"
+#include "topdown.h"
 
 static int ___evlist__add_default_attrs(struct evlist *evlist,
                                        struct perf_event_attr *attrs,
@@ -65,13 +62,7 @@ int arch_evlist__add_default_attrs(struct evlist *evlist,
        if (nr_attrs)
                return ___evlist__add_default_attrs(evlist, attrs, nr_attrs);
 
-       if (!pmu_have_event("cpu", "slots"))
-               return 0;
-
-       if (pmu_have_event("cpu", "topdown-heavy-ops"))
-               return parse_events(evlist, TOPDOWN_L2_EVENTS, NULL);
-       else
-               return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL);
+       return topdown_parse_events(evlist);
 }
 
 struct evsel *arch_evlist__leader(struct list_head *list)
index f81a7cfe4d633a715248fd756aa72d8afaa7336c..67c5243241256385328ead8479671aaef722ac02 100644 (file)
@@ -3,9 +3,17 @@
 #include "api/fs/fs.h"
 #include "util/pmu.h"
 #include "util/topdown.h"
+#include "util/evlist.h"
+#include "util/debug.h"
+#include "util/pmu-hybrid.h"
 #include "topdown.h"
 #include "evsel.h"
 
+#define TOPDOWN_L1_EVENTS       "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}"
+#define TOPDOWN_L1_EVENTS_CORE  "{slots,cpu_core/topdown-retiring/,cpu_core/topdown-bad-spec/,cpu_core/topdown-fe-bound/,cpu_core/topdown-be-bound/}"
+#define TOPDOWN_L2_EVENTS       "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}"
+#define TOPDOWN_L2_EVENTS_CORE  "{slots,cpu_core/topdown-retiring/,cpu_core/topdown-bad-spec/,cpu_core/topdown-fe-bound/,cpu_core/topdown-be-bound/,cpu_core/topdown-heavy-ops/,cpu_core/topdown-br-mispredict/,cpu_core/topdown-fetch-lat/,cpu_core/topdown-mem-bound/}"
+
 /* Check whether there is a PMU which supports the perf metrics. */
 bool topdown_sys_has_perf_metrics(void)
 {
@@ -73,3 +81,46 @@ bool arch_topdown_sample_read(struct evsel *leader)
 
        return false;
 }
+
+const char *arch_get_topdown_pmu_name(struct evlist *evlist, bool warn)
+{
+       const char *pmu_name;
+
+       if (!perf_pmu__has_hybrid())
+               return "cpu";
+
+       if (!evlist->hybrid_pmu_name) {
+               if (warn)
+                       pr_warning("WARNING: default to use cpu_core topdown events\n");
+               evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu("core");
+       }
+
+       pmu_name = evlist->hybrid_pmu_name;
+
+       return pmu_name;
+}
+
+int topdown_parse_events(struct evlist *evlist)
+{
+       const char *topdown_events;
+       const char *pmu_name;
+
+       if (!topdown_sys_has_perf_metrics())
+               return 0;
+
+       pmu_name = arch_get_topdown_pmu_name(evlist, false);
+
+       if (pmu_have_event(pmu_name, "topdown-heavy-ops")) {
+               if (!strcmp(pmu_name, "cpu_core"))
+                       topdown_events = TOPDOWN_L2_EVENTS_CORE;
+               else
+                       topdown_events = TOPDOWN_L2_EVENTS;
+       } else {
+               if (!strcmp(pmu_name, "cpu_core"))
+                       topdown_events = TOPDOWN_L1_EVENTS_CORE;
+               else
+                       topdown_events = TOPDOWN_L1_EVENTS;
+       }
+
+       return parse_events(evlist, topdown_events, NULL);
+}
index 46bf9273e572fa5d8f41becabc3ffe834e9a8a7e..7eb81f0428388cf653d6c7ea315d1d759b5a3967 100644 (file)
@@ -3,5 +3,6 @@
 #define _TOPDOWN_H 1
 
 bool topdown_sys_has_perf_metrics(void);
+int topdown_parse_events(struct evlist *evlist);
 
 #endif
index e8c639c2c91d84a8c4093961c51258b345157c9e..3d22543fc305f1006ed27e960c25574638575b7a 100644 (file)
@@ -71,6 +71,7 @@
 #include "util/bpf_counter.h"
 #include "util/iostat.h"
 #include "util/pmu-hybrid.h"
+ #include "util/topdown.h"
 #include "asm/bug.h"
 
 #include <linux/time64.h>
@@ -1858,22 +1859,11 @@ static int add_default_attributes(void)
                unsigned int max_level = 1;
                char *str = NULL;
                bool warn = false;
-               const char *pmu_name = "cpu";
+               const char *pmu_name = arch_get_topdown_pmu_name(evsel_list, true);
 
                if (!force_metric_only)
                        stat_config.metric_only = true;
 
-               if (perf_pmu__has_hybrid()) {
-                       if (!evsel_list->hybrid_pmu_name) {
-                               pr_warning("WARNING: default to use cpu_core topdown events\n");
-                               evsel_list->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu("core");
-                       }
-
-                       pmu_name = evsel_list->hybrid_pmu_name;
-                       if (!pmu_name)
-                               return -1;
-               }
-
                if (pmu_have_event(pmu_name, topdown_metric_L2_attrs[5])) {
                        metric_attrs = topdown_metric_L2_attrs;
                        max_level = 2;
index 606f09b09226f6f8bc23f5761190e014da0d3e67..44045565c8f851eb3b9d1f243b7546e6ea354621 100644 (file)
@@ -374,7 +374,7 @@ static void abs_printout(struct perf_stat_config *config,
                        config->csv_output ? 0 : config->unit_width,
                        evsel->unit, config->csv_sep);
 
-       fprintf(output, "%-*s", config->csv_output ? 0 : 25, evsel__name(evsel));
+       fprintf(output, "%-*s", config->csv_output ? 0 : 32, evsel__name(evsel));
 
        print_cgroup(config, evsel);
 }
index a369f84ceb6a49df312f60765e3394bb32cb6049..1090841550f7bd004725ecec8df7f233e16f5f26 100644 (file)
@@ -65,3 +65,10 @@ __weak bool arch_topdown_sample_read(struct evsel *leader __maybe_unused)
 {
        return false;
 }
+
+__weak const char *arch_get_topdown_pmu_name(struct evlist *evlist
+                                            __maybe_unused,
+                                            bool warn __maybe_unused)
+{
+       return "cpu";
+}
index 118e75281f93310d0af67b405b004613fdd78706..f9531528c559c7b47b03c1eeb704704b7421269c 100644 (file)
@@ -2,11 +2,12 @@
 #ifndef TOPDOWN_H
 #define TOPDOWN_H 1
 #include "evsel.h"
+#include "evlist.h"
 
 bool arch_topdown_check_group(bool *warn);
 void arch_topdown_group_warn(void);
 bool arch_topdown_sample_read(struct evsel *leader);
-
+const char *arch_get_topdown_pmu_name(struct evlist *evlist, bool warn);
 int topdown_filter_events(const char **attr, char **str, bool use_group,
                          const char *pmu_name);