perf: Fix perf_event_pmu_context serialization
authorJames Clark <james.clark@arm.com>
Fri, 27 Jan 2023 14:31:41 +0000 (14:31 +0000)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 31 Jan 2023 19:37:18 +0000 (20:37 +0100)
Syzkaller triggered a WARN in put_pmu_ctx().

  WARNING: CPU: 1 PID: 2245 at kernel/events/core.c:4925 put_pmu_ctx+0x1f0/0x278

This is because there is no locking around the access of "if
(!epc->ctx)" in find_get_pmu_context() and when it is set to NULL in
put_pmu_ctx().

The decrement of the reference count in put_pmu_ctx() also happens
outside of the spinlock, leading to the possibility of this order of
events, and the context being cleared in put_pmu_ctx(), after its
refcount is non zero:

 CPU0                                   CPU1
 find_get_pmu_context()
   if (!epc->ctx) == false
                                        put_pmu_ctx()
                                        atomic_dec_and_test(&epc->refcount) == true
                                        epc->refcount == 0
     atomic_inc(&epc->refcount);
     epc->refcount == 1
                                        list_del_init(&epc->pmu_ctx_entry);
                                      epc->ctx = NULL;

Another issue is that WARN_ON for no active PMU events in put_pmu_ctx()
is outside of the lock. If the perf_event_pmu_context is an embedded
one, even after clearing it, it won't be deleted and can be re-used. So
the warning can trigger. For this reason it also needs to be moved
inside the lock.

The above warning is very quick to trigger on Arm by running these two
commands at the same time:

  while true; do perf record -- ls; done
  while true; do perf record -- ls; done

[peterz: atomic_dec_and_raw_lock*()]
Fixes: bd2756811766 ("perf: Rewrite core context handling")
Reported-by: syzbot+697196bc0265049822bd@syzkaller.appspotmail.com
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20230127143141.1782804-2-james.clark@arm.com
include/linux/spinlock.h
kernel/events/core.c
lib/dec_and_lock.c

index 1341f7d62da44663d48630991e7455e364430b63..be48f1cb1878ba271ef397d36c49feb9ea976a6b 100644 (file)
@@ -476,6 +476,15 @@ extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
 #define atomic_dec_and_lock_irqsave(atomic, lock, flags) \
                __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags)))
 
+extern int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock);
+#define atomic_dec_and_raw_lock(atomic, lock) \
+               __cond_lock(lock, _atomic_dec_and_raw_lock(atomic, lock))
+
+extern int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock,
+                                       unsigned long *flags);
+#define atomic_dec_and_raw_lock_irqsave(atomic, lock, flags) \
+               __cond_lock(lock, _atomic_dec_and_raw_lock_irqsave(atomic, lock, &(flags)))
+
 int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
                             size_t max_size, unsigned int cpu_mult,
                             gfp_t gfp, const char *name,
index d56328e5080e9814e2d0b3f045a3d8d2a7f51c3f..c4be13e50547bf57d16a7454bb187449dd4ee1b5 100644 (file)
@@ -4813,19 +4813,17 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
 
                cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
                epc = &cpc->epc;
-
+               raw_spin_lock_irq(&ctx->lock);
                if (!epc->ctx) {
                        atomic_set(&epc->refcount, 1);
                        epc->embedded = 1;
-                       raw_spin_lock_irq(&ctx->lock);
                        list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
                        epc->ctx = ctx;
-                       raw_spin_unlock_irq(&ctx->lock);
                } else {
                        WARN_ON_ONCE(epc->ctx != ctx);
                        atomic_inc(&epc->refcount);
                }
-
+               raw_spin_unlock_irq(&ctx->lock);
                return epc;
        }
 
@@ -4896,33 +4894,30 @@ static void free_epc_rcu(struct rcu_head *head)
 
 static void put_pmu_ctx(struct perf_event_pmu_context *epc)
 {
+       struct perf_event_context *ctx = epc->ctx;
        unsigned long flags;
 
-       if (!atomic_dec_and_test(&epc->refcount))
+       /*
+        * XXX
+        *
+        * lockdep_assert_held(&ctx->mutex);
+        *
+        * can't because of the call-site in _free_event()/put_event()
+        * which isn't always called under ctx->mutex.
+        */
+       if (!atomic_dec_and_raw_lock_irqsave(&epc->refcount, &ctx->lock, flags))
                return;
 
-       if (epc->ctx) {
-               struct perf_event_context *ctx = epc->ctx;
+       WARN_ON_ONCE(list_empty(&epc->pmu_ctx_entry));
 
-               /*
-                * XXX
-                *
-                * lockdep_assert_held(&ctx->mutex);
-                *
-                * can't because of the call-site in _free_event()/put_event()
-                * which isn't always called under ctx->mutex.
-                */
-
-               WARN_ON_ONCE(list_empty(&epc->pmu_ctx_entry));
-               raw_spin_lock_irqsave(&ctx->lock, flags);
-               list_del_init(&epc->pmu_ctx_entry);
-               epc->ctx = NULL;
-               raw_spin_unlock_irqrestore(&ctx->lock, flags);
-       }
+       list_del_init(&epc->pmu_ctx_entry);
+       epc->ctx = NULL;
 
        WARN_ON_ONCE(!list_empty(&epc->pinned_active));
        WARN_ON_ONCE(!list_empty(&epc->flexible_active));
 
+       raw_spin_unlock_irqrestore(&ctx->lock, flags);
+
        if (epc->embedded)
                return;
 
index 9555b68bb774cc3277dca434d19880286d71df0e..1dcca8f2e194204e63fb8bf443c71b052dfab7ee 100644 (file)
@@ -49,3 +49,34 @@ int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
        return 0;
 }
 EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave);
+
+int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock)
+{
+       /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
+       if (atomic_add_unless(atomic, -1, 1))
+               return 0;
+
+       /* Otherwise do it the slow way */
+       raw_spin_lock(lock);
+       if (atomic_dec_and_test(atomic))
+               return 1;
+       raw_spin_unlock(lock);
+       return 0;
+}
+EXPORT_SYMBOL(_atomic_dec_and_raw_lock);
+
+int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock,
+                                    unsigned long *flags)
+{
+       /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
+       if (atomic_add_unless(atomic, -1, 1))
+               return 0;
+
+       /* Otherwise do it the slow way */
+       raw_spin_lock_irqsave(lock, *flags);
+       if (atomic_dec_and_test(atomic))
+               return 1;
+       raw_spin_unlock_irqrestore(lock, *flags);
+       return 0;
+}
+EXPORT_SYMBOL(_atomic_dec_and_raw_lock_irqsave);