cgroup/rstat: add cgroup_rstat_cpu_lock helpers and tracepoints

author Jesper Dangaard Brouer <hawk@kernel.org>

Wed, 1 May 2024 14:04:11 +0000 (16:04 +0200)

committer Tejun Heo <tj@kernel.org>

Tue, 14 May 2024 19:43:17 +0000 (09:43 -1000)
author Jesper Dangaard Brouer <hawk@kernel.org>
Wed, 1 May 2024 14:04:11 +0000 (16:04 +0200)
committer Tejun Heo <tj@kernel.org>
Tue, 14 May 2024 19:43:17 +0000 (09:43 -1000)
diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h

index 13f3758001355c785bce1c490181aedc468ffc4e..0b95865a90f3d82946d4e4f804e7ee60c76effb5 100644 (file)
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -206,15 +206,15 @@ DEFINE_EVENT(cgroup_event, cgroup_notify_frozen,
  
  DECLARE_EVENT_CLASS(cgroup_rstat,
  
-       TP_PROTO(struct cgroup *cgrp, int cpu_in_loop, bool contended),
+       TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
  
-       TP_ARGS(cgrp, cpu_in_loop, contended),
+       TP_ARGS(cgrp, cpu, contended),
  
         TP_STRUCT__entry(
                 __field(        int,            root                    )
                 __field(        int,            level                   )
                 __field(        u64,            id                      )
-               __field(        int,            cpu_in_loop             )
+               __field(        int,            cpu                     )
                 __field(        bool,           contended               )
         ),
  
@@ -222,15 +222,16 @@ DECLARE_EVENT_CLASS(cgroup_rstat,
                 __entry->root = cgrp->root->hierarchy_id;
                 __entry->id = cgroup_id(cgrp);
                 __entry->level = cgrp->level;
-               __entry->cpu_in_loop = cpu_in_loop;
+               __entry->cpu = cpu;
                 __entry->contended = contended;
         ),
  
-       TP_printk("root=%d id=%llu level=%d cpu_in_loop=%d lock contended:%d",
+       TP_printk("root=%d id=%llu level=%d cpu=%d lock contended:%d",
                   __entry->root, __entry->id, __entry->level,
-                 __entry->cpu_in_loop, __entry->contended)
+                 __entry->cpu, __entry->contended)
  );
  
+/* Related to global: cgroup_rstat_lock */
  DEFINE_EVENT(cgroup_rstat, cgroup_rstat_lock_contended,
  
         TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
@@ -252,6 +253,49 @@ DEFINE_EVENT(cgroup_rstat, cgroup_rstat_unlock,
         TP_ARGS(cgrp, cpu, contended)
  );
  
+/* Related to per CPU: cgroup_rstat_cpu_lock */
+DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended,
+
+       TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
+
+       TP_ARGS(cgrp, cpu, contended)
+);
+
+DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended_fastpath,
+
+       TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
+
+       TP_ARGS(cgrp, cpu, contended)
+);
+
+DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_locked,
+
+       TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
+
+       TP_ARGS(cgrp, cpu, contended)
+);
+
+DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_locked_fastpath,
+
+       TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
+
+       TP_ARGS(cgrp, cpu, contended)
+);
+
+DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock,
+
+       TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
+
+       TP_ARGS(cgrp, cpu, contended)
+);
+
+DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock_fastpath,
+
+       TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
+
+       TP_ARGS(cgrp, cpu, contended)
+);
+
  #endif /* _TRACE_CGROUP_H */
  
  /* This part must be outside protection */
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c

index 52e3b0ed1ceea35d8a74cf98c79f9720060faecf..fb8b494375731baf375dc27f272245966b730a23 100644 (file)
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -19,6 +19,60 @@ static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
         return per_cpu_ptr(cgrp->rstat_cpu, cpu);
  }
  
+/*
+ * Helper functions for rstat per CPU lock (cgroup_rstat_cpu_lock).
+ *
+ * This makes it easier to diagnose locking issues and contention in
+ * production environments. The parameter @fast_path determine the
+ * tracepoints being added, allowing us to diagnose "flush" related
+ * operations without handling high-frequency fast-path "update" events.
+ */
+static __always_inline
+unsigned long _cgroup_rstat_cpu_lock(raw_spinlock_t *cpu_lock, int cpu,
+                                    struct cgroup *cgrp, const bool fast_path)
+{
+       unsigned long flags;
+       bool contended;
+
+       /*
+        * The _irqsave() is needed because cgroup_rstat_lock is
+        * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
+        * this lock with the _irq() suffix only disables interrupts on
+        * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
+        * interrupts on both configurations. The _irqsave() ensures
+        * that interrupts are always disabled and later restored.
+        */
+       contended = !raw_spin_trylock_irqsave(cpu_lock, flags);
+       if (contended) {
+               if (fast_path)
+                       trace_cgroup_rstat_cpu_lock_contended_fastpath(cgrp, cpu, contended);
+               else
+                       trace_cgroup_rstat_cpu_lock_contended(cgrp, cpu, contended);
+
+               raw_spin_lock_irqsave(cpu_lock, flags);
+       }
+
+       if (fast_path)
+               trace_cgroup_rstat_cpu_locked_fastpath(cgrp, cpu, contended);
+       else
+               trace_cgroup_rstat_cpu_locked(cgrp, cpu, contended);
+
+       return flags;
+}
+
+static __always_inline
+void _cgroup_rstat_cpu_unlock(raw_spinlock_t *cpu_lock, int cpu,
+                             struct cgroup *cgrp, unsigned long flags,
+                             const bool fast_path)
+{
+       if (fast_path)
+               trace_cgroup_rstat_cpu_unlock_fastpath(cgrp, cpu, false);
+       else
+               trace_cgroup_rstat_cpu_unlock(cgrp, cpu, false);
+
+       raw_spin_unlock_irqrestore(cpu_lock, flags);
+}
+
  /**
   * cgroup_rstat_updated - keep track of updated rstat_cpu
   * @cgrp: target cgroup
@@ -44,7 +98,7 @@ __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
         if (data_race(cgroup_rstat_cpu(cgrp, cpu)->updated_next))
                 return;
  
-       raw_spin_lock_irqsave(cpu_lock, flags);
+       flags = _cgroup_rstat_cpu_lock(cpu_lock, cpu, cgrp, true);
  
         /* put @cgrp and all ancestors on the corresponding updated lists */
         while (true) {
@@ -72,7 +126,7 @@ __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
                 cgrp = parent;
         }
  
-       raw_spin_unlock_irqrestore(cpu_lock, flags);
+       _cgroup_rstat_cpu_unlock(cpu_lock, cpu, cgrp, flags, true);
  }
  
  /**
@@ -153,15 +207,7 @@ static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
         struct cgroup *head = NULL, *parent, *child;
         unsigned long flags;
  
-       /*
-        * The _irqsave() is needed because cgroup_rstat_lock is
-        * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
-        * this lock with the _irq() suffix only disables interrupts on
-        * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
-        * interrupts on both configurations. The _irqsave() ensures
-        * that interrupts are always disabled and later restored.
-        */
-       raw_spin_lock_irqsave(cpu_lock, flags);
+       flags = _cgroup_rstat_cpu_lock(cpu_lock, cpu, root, false);
  
         /* Return NULL if this subtree is not on-list */
         if (!rstatc->updated_next)
@@ -198,7 +244,7 @@ static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
         if (child != root)
                 head = cgroup_rstat_push_children(head, child, cpu);
  unlock_ret:
-       raw_spin_unlock_irqrestore(cpu_lock, flags);
+       _cgroup_rstat_cpu_unlock(cpu_lock, cpu, root, flags, false);
         return head;
  }
author	Jesper Dangaard Brouer <hawk@kernel.org>
	Wed, 1 May 2024 14:04:11 +0000 (16:04 +0200)
committer	Tejun Heo <tj@kernel.org>
	Tue, 14 May 2024 19:43:17 +0000 (09:43 -1000)
include/trace/events/cgroup.h		patch \| blob \| blame \| history
kernel/cgroup/rstat.c		patch \| blob \| blame \| history