perf/x86: Track pmu in per-CPU cpu_hw_events
authorKan Liang <kan.liang@linux.intel.com>
Mon, 12 Apr 2021 14:30:43 +0000 (07:30 -0700)
committerSasha Levin <sashal@kernel.org>
Wed, 30 Jun 2021 12:47:24 +0000 (08:47 -0400)
[ Upstream commit 61e76d53c39bb768ad264d379837cfc56b9e35b4 ]

Some platforms, e.g. Alder Lake, have hybrid architecture. In the same
package, there may be more than one type of CPU. The PMU capabilities
are different among different types of CPU. Perf will register a
dedicated PMU for each type of CPU.

Add a 'pmu' variable in the struct cpu_hw_events to track the dedicated
PMU of the current CPU.

Current x86_get_pmu() use the global 'pmu', which will be broken on a
hybrid platform. Modify it to apply the 'pmu' of the specific CPU.

Initialize the per-CPU 'pmu' variable with the global 'pmu'. There is
nothing changed for the non-hybrid platforms.

The is_x86_event() will be updated in the later patch ("perf/x86:
Register hybrid PMUs") for hybrid platforms. For the non-hybrid
platforms, nothing is changed here.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/1618237865-33448-4-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
arch/x86/events/core.c
arch/x86/events/intel/core.c
arch/x86/events/intel/ds.c
arch/x86/events/intel/lbr.c
arch/x86/events/perf_event.h

index b7f8ed87bfbc31c9c8edb0e40fce03786e53e4aa..e6db1a1f22d7d60d7e8e81abce3b12d5be7da997 100644 (file)
 #include "perf_event.h"
 
 struct x86_pmu x86_pmu __read_mostly;
+static struct pmu pmu;
 
 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
        .enabled = 1,
+       .pmu = &pmu,
 };
 
 DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
@@ -712,16 +714,23 @@ void x86_pmu_enable_all(int added)
        }
 }
 
-static struct pmu pmu;
-
 static inline int is_x86_event(struct perf_event *event)
 {
        return event->pmu == &pmu;
 }
 
-struct pmu *x86_get_pmu(void)
+struct pmu *x86_get_pmu(unsigned int cpu)
 {
-       return &pmu;
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+       /*
+        * All CPUs of the hybrid type have been offline.
+        * The x86_get_pmu() should not be invoked.
+        */
+       if (WARN_ON_ONCE(!cpuc->pmu))
+               return &pmu;
+
+       return cpuc->pmu;
 }
 /*
  * Event scheduler state:
index ee659b5faf71460ec7a7d1db530b6d0dd13ee64e..3b8b8eede1a8aa7862209f3f216fb2aa53cf6d16 100644 (file)
@@ -4747,7 +4747,7 @@ static void update_tfa_sched(void *ignored)
         * and if so force schedule out for all event types all contexts
         */
        if (test_bit(3, cpuc->active_mask))
-               perf_pmu_resched(x86_get_pmu());
+               perf_pmu_resched(x86_get_pmu(smp_processor_id()));
 }
 
 static ssize_t show_sysctl_tfa(struct device *cdev,
index 31a7a6566d077b3af5c019f7a5fb974b134bbc0b..945d470f62d0fcc09e4fd525fcf9f067e2bb745b 100644 (file)
@@ -2076,7 +2076,7 @@ void __init intel_ds_init(void)
                                        PERF_SAMPLE_TIME;
                                x86_pmu.flags |= PMU_FL_PEBS_ALL;
                                pebs_qual = "-baseline";
-                               x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
+                               x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
                        } else {
                                /* Only basic record supported */
                                x86_pmu.large_pebs_flags &=
@@ -2091,7 +2091,7 @@ void __init intel_ds_init(void)
 
                        if (x86_pmu.intel_cap.pebs_output_pt_available) {
                                pr_cont("PEBS-via-PT, ");
-                               x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
+                               x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
                        }
 
                        break;
index 29ec4fe4850715c73a761abae1252975c6440aad..9c1a013d5682297885704ee425127db6d464b741 100644 (file)
@@ -699,7 +699,7 @@ void intel_pmu_lbr_add(struct perf_event *event)
 
 void release_lbr_buffers(void)
 {
-       struct kmem_cache *kmem_cache = x86_get_pmu()->task_ctx_cache;
+       struct kmem_cache *kmem_cache;
        struct cpu_hw_events *cpuc;
        int cpu;
 
@@ -708,6 +708,7 @@ void release_lbr_buffers(void)
 
        for_each_possible_cpu(cpu) {
                cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
+               kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
                if (kmem_cache && cpuc->lbr_xsave) {
                        kmem_cache_free(kmem_cache, cpuc->lbr_xsave);
                        cpuc->lbr_xsave = NULL;
@@ -1624,7 +1625,7 @@ void intel_pmu_lbr_init_hsw(void)
        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
        x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
 
-       x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0);
+       x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
 
        if (lbr_from_signext_quirk_needed())
                static_branch_enable(&lbr_from_quirk_key);
@@ -1644,7 +1645,7 @@ __init void intel_pmu_lbr_init_skl(void)
        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
        x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
 
-       x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0);
+       x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
 
        /*
         * SW branch filter usage:
@@ -1741,7 +1742,7 @@ static bool is_arch_lbr_xsave_available(void)
 
 void __init intel_pmu_arch_lbr_init(void)
 {
-       struct pmu *pmu = x86_get_pmu();
+       struct pmu *pmu = x86_get_pmu(smp_processor_id());
        union cpuid28_eax eax;
        union cpuid28_ebx ebx;
        union cpuid28_ecx ecx;
index d4f2ea2d9a9e24d3e219a22d7008afae02fc5b35..f07d77cffb3c6ae809fa5b457a55499e404b3228 100644 (file)
@@ -326,6 +326,8 @@ struct cpu_hw_events {
        int                             n_pair; /* Large increment events */
 
        void                            *kfree_on_online[X86_PERF_KFREE_MAX];
+
+       struct pmu                      *pmu;
 };
 
 #define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) {        \
@@ -897,7 +899,7 @@ static struct perf_pmu_events_ht_attr event_attr_##v = {            \
        .event_str_ht   = ht,                                           \
 }
 
-struct pmu *x86_get_pmu(void);
+struct pmu *x86_get_pmu(unsigned int cpu);
 extern struct x86_pmu x86_pmu __read_mostly;
 
 static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)