sched/fair: Fair server interface
authorDaniel Bristot de Oliveira <bristot@kernel.org>
Mon, 27 May 2024 12:06:52 +0000 (14:06 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Mon, 29 Jul 2024 10:22:36 +0000 (12:22 +0200)
Add an interface for fair server setup on debugfs.

Each CPU has two files under /debug/sched/fair_server/cpu{ID}:

 - runtime: set runtime in ns
 - period:  set period in ns

This then leaves /proc/sys/kernel/sched_rt_{period,runtime}_us to set
bounds on admission control.

The interface also add the server to the dl bandwidth accounting.

Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Juri Lelli <juri.lelli@redhat.com>
Link: https://lore.kernel.org/r/a9ef9fc69bcedb44bddc9bc34f2b313296052819.1716811044.git.bristot@kernel.org
kernel/sched/deadline.c
kernel/sched/debug.c
kernel/sched/sched.h
kernel/sched/topology.c

index 1b295314bc93fa94c73277c4a3a8085319f98db3..747c0c51d753092385894438c2769e1a05ceb8a1 100644 (file)
@@ -320,19 +320,12 @@ void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
                __sub_running_bw(dl_se->dl_bw, dl_rq);
 }
 
-static void dl_change_utilization(struct task_struct *p, u64 new_bw)
+static void dl_rq_change_utilization(struct rq *rq, struct sched_dl_entity *dl_se, u64 new_bw)
 {
-       struct rq *rq;
-
-       WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
-
-       if (task_on_rq_queued(p))
-               return;
+       if (dl_se->dl_non_contending) {
+               sub_running_bw(dl_se, &rq->dl);
+               dl_se->dl_non_contending = 0;
 
-       rq = task_rq(p);
-       if (p->dl.dl_non_contending) {
-               sub_running_bw(&p->dl, &rq->dl);
-               p->dl.dl_non_contending = 0;
                /*
                 * If the timer handler is currently running and the
                 * timer cannot be canceled, inactive_task_timer()
@@ -340,13 +333,25 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
                 * will not touch the rq's active utilization,
                 * so we are still safe.
                 */
-               if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
-                       put_task_struct(p);
+               if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1) {
+                       if (!dl_server(dl_se))
+                               put_task_struct(dl_task_of(dl_se));
+               }
        }
-       __sub_rq_bw(p->dl.dl_bw, &rq->dl);
+       __sub_rq_bw(dl_se->dl_bw, &rq->dl);
        __add_rq_bw(new_bw, &rq->dl);
 }
 
+static void dl_change_utilization(struct task_struct *p, u64 new_bw)
+{
+       WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
+
+       if (task_on_rq_queued(p))
+               return;
+
+       dl_rq_change_utilization(task_rq(p), &p->dl, new_bw);
+}
+
 static void __dl_clear_params(struct sched_dl_entity *dl_se);
 
 /*
@@ -1621,11 +1626,17 @@ void dl_server_start(struct sched_dl_entity *dl_se)
 {
        struct rq *rq = dl_se->rq;
 
+       /*
+        * XXX: the apply do not work fine at the init phase for the
+        * fair server because things are not yet set. We need to improve
+        * this before getting generic.
+        */
        if (!dl_server(dl_se)) {
                /* Disabled */
-               dl_se->dl_runtime = 0;
-               dl_se->dl_deadline = 1000 * NSEC_PER_MSEC;
-               dl_se->dl_period = 1000 * NSEC_PER_MSEC;
+               u64 runtime = 0;
+               u64 period = 1000 * NSEC_PER_MSEC;
+
+               dl_server_apply_params(dl_se, runtime, period, 1);
 
                dl_se->dl_server = 1;
                dl_se->dl_defer = 1;
@@ -1660,6 +1671,64 @@ void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
        dl_se->server_pick = pick;
 }
 
+void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq)
+{
+       u64 new_bw = dl_se->dl_bw;
+       int cpu = cpu_of(rq);
+       struct dl_bw *dl_b;
+
+       dl_b = dl_bw_of(cpu_of(rq));
+       guard(raw_spinlock)(&dl_b->lock);
+
+       if (!dl_bw_cpus(cpu))
+               return;
+
+       __dl_add(dl_b, new_bw, dl_bw_cpus(cpu));
+}
+
+int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 period, bool init)
+{
+       u64 old_bw = init ? 0 : to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+       u64 new_bw = to_ratio(period, runtime);
+       struct rq *rq = dl_se->rq;
+       int cpu = cpu_of(rq);
+       struct dl_bw *dl_b;
+       unsigned long cap;
+       int retval = 0;
+       int cpus;
+
+       dl_b = dl_bw_of(cpu);
+       guard(raw_spinlock)(&dl_b->lock);
+
+       cpus = dl_bw_cpus(cpu);
+       cap = dl_bw_capacity(cpu);
+
+       if (__dl_overflow(dl_b, cap, old_bw, new_bw))
+               return -EBUSY;
+
+       if (init) {
+               __add_rq_bw(new_bw, &rq->dl);
+               __dl_add(dl_b, new_bw, cpus);
+       } else {
+               __dl_sub(dl_b, dl_se->dl_bw, cpus);
+               __dl_add(dl_b, new_bw, cpus);
+
+               dl_rq_change_utilization(rq, dl_se, new_bw);
+       }
+
+       dl_se->dl_runtime = runtime;
+       dl_se->dl_deadline = period;
+       dl_se->dl_period = period;
+
+       dl_se->runtime = 0;
+       dl_se->deadline = 0;
+
+       dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+       dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
+
+       return retval;
+}
+
 /*
  * Update the current task's runtime statistics (provided it is still
  * a -deadline task and has not been removed from the dl_rq).
index 90c4a9998377c743afc40d623974c1b059461bf4..72f2715dec78241191a77a350fea5ad716897ebb 100644 (file)
@@ -333,8 +333,165 @@ static const struct file_operations sched_debug_fops = {
        .release        = seq_release,
 };
 
+enum dl_param {
+       DL_RUNTIME = 0,
+       DL_PERIOD,
+};
+
+static unsigned long fair_server_period_max = (1 << 22) * NSEC_PER_USEC; /* ~4 seconds */
+static unsigned long fair_server_period_min = (100) * NSEC_PER_USEC;     /* 100 us */
+
+static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubuf,
+                                      size_t cnt, loff_t *ppos, enum dl_param param)
+{
+       long cpu = (long) ((struct seq_file *) filp->private_data)->private;
+       struct rq *rq = cpu_rq(cpu);
+       u64 runtime, period;
+       size_t err;
+       int retval;
+       u64 value;
+
+       err = kstrtoull_from_user(ubuf, cnt, 10, &value);
+       if (err)
+               return err;
+
+       scoped_guard (rq_lock_irqsave, rq) {
+               runtime  = rq->fair_server.dl_runtime;
+               period = rq->fair_server.dl_period;
+
+               switch (param) {
+               case DL_RUNTIME:
+                       if (runtime == value)
+                               break;
+                       runtime = value;
+                       break;
+               case DL_PERIOD:
+                       if (value == period)
+                               break;
+                       period = value;
+                       break;
+               }
+
+               if (runtime > period ||
+                   period > fair_server_period_max ||
+                   period < fair_server_period_min) {
+                       return  -EINVAL;
+               }
+
+               if (rq->cfs.h_nr_running) {
+                       update_rq_clock(rq);
+                       dl_server_stop(&rq->fair_server);
+               }
+
+               retval = dl_server_apply_params(&rq->fair_server, runtime, period, 0);
+               if (retval)
+                       cnt = retval;
+
+               if (!runtime)
+                       printk_deferred("Fair server disabled in CPU %d, system may crash due to starvation.\n",
+                                       cpu_of(rq));
+
+               if (rq->cfs.h_nr_running)
+                       dl_server_start(&rq->fair_server);
+       }
+
+       *ppos += cnt;
+       return cnt;
+}
+
+static size_t sched_fair_server_show(struct seq_file *m, void *v, enum dl_param param)
+{
+       unsigned long cpu = (unsigned long) m->private;
+       struct rq *rq = cpu_rq(cpu);
+       u64 value;
+
+       switch (param) {
+       case DL_RUNTIME:
+               value = rq->fair_server.dl_runtime;
+               break;
+       case DL_PERIOD:
+               value = rq->fair_server.dl_period;
+               break;
+       }
+
+       seq_printf(m, "%llu\n", value);
+       return 0;
+
+}
+
+static ssize_t
+sched_fair_server_runtime_write(struct file *filp, const char __user *ubuf,
+                               size_t cnt, loff_t *ppos)
+{
+       return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_RUNTIME);
+}
+
+static int sched_fair_server_runtime_show(struct seq_file *m, void *v)
+{
+       return sched_fair_server_show(m, v, DL_RUNTIME);
+}
+
+static int sched_fair_server_runtime_open(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, sched_fair_server_runtime_show, inode->i_private);
+}
+
+static const struct file_operations fair_server_runtime_fops = {
+       .open           = sched_fair_server_runtime_open,
+       .write          = sched_fair_server_runtime_write,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static ssize_t
+sched_fair_server_period_write(struct file *filp, const char __user *ubuf,
+                              size_t cnt, loff_t *ppos)
+{
+       return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_PERIOD);
+}
+
+static int sched_fair_server_period_show(struct seq_file *m, void *v)
+{
+       return sched_fair_server_show(m, v, DL_PERIOD);
+}
+
+static int sched_fair_server_period_open(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, sched_fair_server_period_show, inode->i_private);
+}
+
+static const struct file_operations fair_server_period_fops = {
+       .open           = sched_fair_server_period_open,
+       .write          = sched_fair_server_period_write,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
 static struct dentry *debugfs_sched;
 
+static void debugfs_fair_server_init(void)
+{
+       struct dentry *d_fair;
+       unsigned long cpu;
+
+       d_fair = debugfs_create_dir("fair_server", debugfs_sched);
+       if (!d_fair)
+               return;
+
+       for_each_possible_cpu(cpu) {
+               struct dentry *d_cpu;
+               char buf[32];
+
+               snprintf(buf, sizeof(buf), "cpu%lu", cpu);
+               d_cpu = debugfs_create_dir(buf, d_fair);
+
+               debugfs_create_file("runtime", 0644, d_cpu, (void *) cpu, &fair_server_runtime_fops);
+               debugfs_create_file("period", 0644, d_cpu, (void *) cpu, &fair_server_period_fops);
+       }
+}
+
 static __init int sched_init_debug(void)
 {
        struct dentry __maybe_unused *numa;
@@ -374,6 +531,8 @@ static __init int sched_init_debug(void)
 
        debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);
 
+       debugfs_fair_server_init();
+
        return 0;
 }
 late_initcall(sched_init_debug);
index 64fb6776664eadc427844cad48d98e853c16f08f..b777ac361543ec402c1d66c845ca45814431c7e3 100644 (file)
@@ -366,6 +366,9 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
 extern void dl_server_update_idle_time(struct rq *rq,
                    struct task_struct *p);
 extern void fair_server_init(struct rq *rq);
+extern void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq);
+extern int dl_server_apply_params(struct sched_dl_entity *dl_se,
+                   u64 runtime, u64 period, bool init);
 
 #ifdef CONFIG_CGROUP_SCHED
 
index 76504b776d03b8c364872ee921a932c727a5bc56..9748a4c8d66853e5d07775cdfe1d6cc958ff39d7 100644 (file)
@@ -516,6 +516,14 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
        if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
                set_rq_online(rq);
 
+       /*
+        * Because the rq is not a task, dl_add_task_root_domain() did not
+        * move the fair server bw to the rd if it already started.
+        * Add it now.
+        */
+       if (rq->fair_server.dl_server)
+               __dl_server_attach_root(&rq->fair_server, rq);
+
        rq_unlock_irqrestore(rq, &rf);
 
        if (old_rd)