sched_ext: Add an event, SCX_EV_ENQ_SLICE_DFL
authorChangwoo Min <changwoo@igalia.com>
Fri, 7 Feb 2025 06:40:51 +0000 (15:40 +0900)
committerTejun Heo <tj@kernel.org>
Fri, 7 Feb 2025 21:24:59 +0000 (11:24 -1000)
Add a core event, SCX_EV_ENQ_SLICE_DFL, which represents how many
tasks have been enqueued (or pick_task-ed or select_cpu-ed) with
a default time slice (SCX_SLICE_DFL).

Scheduling a task with SCX_SLICE_DFL unintentionally would be a source
of latency spikes because SCX_SLICE_DFL is relatively long (20 msec).
Thus, soaring the SCX_EV_ENQ_SLICE_DFL value would be a sign of BPF
scheduler bugs, causing latency spikes, especially when ops.select_cpu()
is provided.

__scx_add_event() is used since the caller holds an rq lock or p->pi_lock,
so the preemption has already been disabled.

Signed-off-by: Changwoo Min <changwoo@igalia.com>
Acked-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/sched/ext.c

index 8a9a30895381ac8536185a2d8242f88d78c0e79b..5ef90d9bcdd2dad524b69a2d64e3e9a186d80403 100644 (file)
@@ -1468,6 +1468,12 @@ struct scx_event_stats {
         */
        u64             SCX_EV_ENQ_SKIP_EXITING;
 
+       /*
+        * The total number of tasks enqueued (or pick_task-ed) with a
+        * default time slice (SCX_SLICE_DFL).
+        */
+       u64             SCX_EV_ENQ_SLICE_DFL;
+
        /*
         * The total duration of bypass modes in nanoseconds.
         */
@@ -2134,6 +2140,7 @@ local:
         */
        touch_core_sched(rq, p);
        p->scx.slice = SCX_SLICE_DFL;
+       __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
 local_norefill:
        dispatch_enqueue(&rq->scx.local_dsq, p, enq_flags);
        return;
@@ -2141,6 +2148,7 @@ local_norefill:
 global:
        touch_core_sched(rq, p);        /* see the comment in local: */
        p->scx.slice = SCX_SLICE_DFL;
+       __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
        dispatch_enqueue(find_global_dsq(p), p, enq_flags);
 }
 
@@ -3202,8 +3210,10 @@ static struct task_struct *pick_task_scx(struct rq *rq)
         */
        if (keep_prev) {
                p = prev;
-               if (!p->scx.slice)
+               if (!p->scx.slice) {
                        p->scx.slice = SCX_SLICE_DFL;
+                       __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
+               }
        } else {
                p = first_local_task(rq);
                if (!p) {
@@ -3219,6 +3229,7 @@ static struct task_struct *pick_task_scx(struct rq *rq)
                                scx_warned_zero_slice = true;
                        }
                        p->scx.slice = SCX_SLICE_DFL;
+                       __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
                }
        }
 
@@ -3306,6 +3317,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
                if (found) {
                        p->scx.slice = SCX_SLICE_DFL;
                        p->scx.ddsp_dsq_id = SCX_DSQ_LOCAL;
+                       __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
                }
 
                if (rq_bypass)
@@ -5023,6 +5035,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
        scx_dump_event(s, &events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE);
        scx_dump_event(s, &events, SCX_EV_DISPATCH_KEEP_LAST);
        scx_dump_event(s, &events, SCX_EV_ENQ_SKIP_EXITING);
+       scx_dump_event(s, &events, SCX_EV_ENQ_SLICE_DFL);
        scx_dump_event(s, &events, SCX_EV_BYPASS_DURATION);
        scx_dump_event(s, &events, SCX_EV_BYPASS_DISPATCH);
        scx_dump_event(s, &events, SCX_EV_BYPASS_ACTIVATE);
@@ -7163,6 +7176,7 @@ __bpf_kfunc void scx_bpf_events(struct scx_event_stats *events,
                scx_agg_event(&e_sys, e_cpu, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE);
                scx_agg_event(&e_sys, e_cpu, SCX_EV_DISPATCH_KEEP_LAST);
                scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SKIP_EXITING);
+               scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SLICE_DFL);
                scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_DURATION);
                scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_DISPATCH);
                scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_ACTIVATE);