Merge tag 'perf_urgent_for_v6.4_rc2' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 14 May 2023 14:56:51 +0000 (07:56 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 14 May 2023 14:56:51 +0000 (07:56 -0700)
Pull perf fixes from Borislav Petkov:

 - Make sure the PEBS buffer is flushed before reprogramming the
   hardware so that the correct record sizes are used

 - Update the sample size for AMD BRS events

 - Fix a confusion with using the same on-stack struct with different
   events in the event processing path

* tag 'perf_urgent_for_v6.4_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel/ds: Flush PEBS DS when changing PEBS_DATA_CFG
  perf/x86: Fix missing sample size update on AMD BRS
  perf/core: Fix perf_sample_data not properly initialized for different swevents in perf_tp_event()

arch/x86/events/core.c
arch/x86/events/intel/ds.c
arch/x86/include/asm/perf_event.h
kernel/events/core.c

index d096b04bf80e80b2c9784cd5380dbe7677f45cd9..9d248703cbddcd6fb7623d22f4a408adee8d97d5 100644 (file)
@@ -1703,10 +1703,8 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
 
                perf_sample_data_init(&data, 0, event->hw.last_period);
 
-               if (has_branch_stack(event)) {
-                       data.br_stack = &cpuc->lbr_stack;
-                       data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
-               }
+               if (has_branch_stack(event))
+                       perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
 
                if (perf_event_overflow(event, &data, regs))
                        x86_pmu_stop(event, 0);
index a2e566e53076ec83daa42f4cc91609ae11705cfa..df88576d6b2a54bbf00ffc5dd5c71cdca7213981 100644 (file)
@@ -1229,12 +1229,14 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
                  struct perf_event *event, bool add)
 {
        struct pmu *pmu = event->pmu;
+
        /*
         * Make sure we get updated with the first PEBS
         * event. It will trigger also during removal, but
         * that does not hurt:
         */
-       bool update = cpuc->n_pebs == 1;
+       if (cpuc->n_pebs == 1)
+               cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
 
        if (needed_cb != pebs_needs_sched_cb(cpuc)) {
                if (!needed_cb)
@@ -1242,7 +1244,7 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
                else
                        perf_sched_cb_dec(pmu);
 
-               update = true;
+               cpuc->pebs_data_cfg |= PEBS_UPDATE_DS_SW;
        }
 
        /*
@@ -1252,24 +1254,13 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
        if (x86_pmu.intel_cap.pebs_baseline && add) {
                u64 pebs_data_cfg;
 
-               /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
-               if (cpuc->n_pebs == 1) {
-                       cpuc->pebs_data_cfg = 0;
-                       cpuc->pebs_record_size = sizeof(struct pebs_basic);
-               }
-
                pebs_data_cfg = pebs_update_adaptive_cfg(event);
-
-               /* Update pebs_record_size if new event requires more data. */
-               if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
-                       cpuc->pebs_data_cfg |= pebs_data_cfg;
-                       adaptive_pebs_record_size_update();
-                       update = true;
-               }
+               /*
+                * Be sure to update the thresholds when we change the record.
+                */
+               if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
+                       cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
        }
-
-       if (update)
-               pebs_update_threshold(cpuc);
 }
 
 void intel_pmu_pebs_add(struct perf_event *event)
@@ -1326,9 +1317,17 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
        wrmsrl(base + idx, value);
 }
 
+static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
+{
+       if (cpuc->n_pebs == cpuc->n_large_pebs &&
+           cpuc->n_pebs != cpuc->n_pebs_via_pt)
+               intel_pmu_drain_pebs_buffer();
+}
+
 void intel_pmu_pebs_enable(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       u64 pebs_data_cfg = cpuc->pebs_data_cfg & ~PEBS_UPDATE_DS_SW;
        struct hw_perf_event *hwc = &event->hw;
        struct debug_store *ds = cpuc->ds;
        unsigned int idx = hwc->idx;
@@ -1344,11 +1343,22 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 
        if (x86_pmu.intel_cap.pebs_baseline) {
                hwc->config |= ICL_EVENTSEL_ADAPTIVE;
-               if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
-                       wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
-                       cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
+               if (pebs_data_cfg != cpuc->active_pebs_data_cfg) {
+                       /*
+                        * drain_pebs() assumes uniform record size;
+                        * hence we need to drain when changing said
+                        * size.
+                        */
+                       intel_pmu_drain_large_pebs(cpuc);
+                       adaptive_pebs_record_size_update();
+                       wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg);
+                       cpuc->active_pebs_data_cfg = pebs_data_cfg;
                }
        }
+       if (cpuc->pebs_data_cfg & PEBS_UPDATE_DS_SW) {
+               cpuc->pebs_data_cfg = pebs_data_cfg;
+               pebs_update_threshold(cpuc);
+       }
 
        if (idx >= INTEL_PMC_IDX_FIXED) {
                if (x86_pmu.intel_cap.pebs_format < 5)
@@ -1391,9 +1401,7 @@ void intel_pmu_pebs_disable(struct perf_event *event)
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
 
-       if (cpuc->n_pebs == cpuc->n_large_pebs &&
-           cpuc->n_pebs != cpuc->n_pebs_via_pt)
-               intel_pmu_drain_pebs_buffer();
+       intel_pmu_drain_large_pebs(cpuc);
 
        cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
 
index 8fc15ed5e60bb627db5226bf6e966b9294ae44ab..abf09882f58b67c20a622a98ced522961b818931 100644 (file)
 #define PEBS_DATACFG_LBRS      BIT_ULL(3)
 #define PEBS_DATACFG_LBR_SHIFT 24
 
+/* Steal the highest bit of pebs_data_cfg for SW usage */
+#define PEBS_UPDATE_DS_SW      BIT_ULL(63)
+
 /*
  * Intel "Architectural Performance Monitoring" CPUID
  * detection/enumeration details:
index 68baa8194d9f8b30694238d0db88d8ece579efb9..db016e4189319936b1042a336b6ad76046398eee 100644 (file)
@@ -10150,8 +10150,20 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
        perf_trace_buf_update(record, event_type);
 
        hlist_for_each_entry_rcu(event, head, hlist_entry) {
-               if (perf_tp_event_match(event, &data, regs))
+               if (perf_tp_event_match(event, &data, regs)) {
                        perf_swevent_event(event, count, &data, regs);
+
+                       /*
+                        * Here use the same on-stack perf_sample_data,
+                        * some members in data are event-specific and
+                        * need to be re-computed for different sweveents.
+                        * Re-initialize data->sample_flags safely to avoid
+                        * the problem that next event skips preparing data
+                        * because data->sample_flags is set.
+                        */
+                       perf_sample_data_init(&data, 0, 0);
+                       perf_sample_save_raw_data(&data, &raw);
+               }
        }
 
        /*