sched/fair: Use the new cfs_rq.h_nr_runnable
authorVincent Guittot <vincent.guittot@linaro.org>
Mon, 2 Dec 2024 17:46:00 +0000 (18:46 +0100)
committerPeter Zijlstra <peterz@infradead.org>
Mon, 9 Dec 2024 10:48:11 +0000 (11:48 +0100)
Use the new h_nr_runnable that tracks only queued and runnable tasks in the
statistics that are used to balance the system:

 - PELT runnable_avg
 - deciding if a group is overloaded or has spare capacity
 - numa stats
 - reduced capacity management
 - load balance
 - nohz kick

It should be noticed that the rq->nr_running still counts the delayed
dequeued tasks as delayed dequeue is a fair feature that is meaningless
at core level.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://lore.kernel.org/r/20241202174606.4074512-6-vincent.guittot@linaro.org
kernel/sched/fair.c
kernel/sched/pelt.c
kernel/sched/sched.h

index ed01e72b2b7726db2f194b275666046368542db5..3a8bdfbf486721cbf417d5d71296c54028b6777e 100644 (file)
@@ -2128,7 +2128,7 @@ static void update_numa_stats(struct task_numa_env *env,
                ns->load += cpu_load(rq);
                ns->runnable += cpu_runnable(rq);
                ns->util += cpu_util_cfs(cpu);
-               ns->nr_running += rq->cfs.h_nr_queued;
+               ns->nr_running += rq->cfs.h_nr_runnable;
                ns->compute_capacity += capacity_of(cpu);
 
                if (find_idle && idle_core < 0 && !rq->nr_running && idle_cpu(cpu)) {
@@ -5394,7 +5394,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         * When enqueuing a sched_entity, we must:
         *   - Update loads to have both entity and cfs_rq synced with now.
         *   - For group_entity, update its runnable_weight to reflect the new
-        *     h_nr_queued of its group cfs_rq.
+        *     h_nr_runnable of its group cfs_rq.
         *   - For group_entity, update its weight to reflect the new share of
         *     its group cfs_rq
         *   - Add its new weight to cfs_rq->load.weight
@@ -5533,7 +5533,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         * When dequeuing a sched_entity, we must:
         *   - Update loads to have both entity and cfs_rq synced with now.
         *   - For group_entity, update its runnable_weight to reflect the new
-        *     h_nr_queued of its group cfs_rq.
+        *     h_nr_runnable of its group cfs_rq.
         *   - Subtract its previous weight from cfs_rq->load.weight.
         *   - For group entity, update its weight to reflect the new share
         *     of its group cfs_rq.
@@ -10332,7 +10332,7 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
         * When there is more than 1 task, the group_overloaded case already
         * takes care of cpu with reduced capacity
         */
-       if (rq->cfs.h_nr_queued != 1)
+       if (rq->cfs.h_nr_runnable != 1)
                return false;
 
        return check_cpu_capacity(rq, sd);
@@ -10367,7 +10367,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
                sgs->group_load += load;
                sgs->group_util += cpu_util_cfs(i);
                sgs->group_runnable += cpu_runnable(rq);
-               sgs->sum_h_nr_running += rq->cfs.h_nr_queued;
+               sgs->sum_h_nr_running += rq->cfs.h_nr_runnable;
 
                nr_running = rq->nr_running;
                sgs->sum_nr_running += nr_running;
@@ -10682,7 +10682,7 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
                sgs->group_util += cpu_util_without(i, p);
                sgs->group_runnable += cpu_runnable_without(rq, p);
                local = task_running_on_cpu(i, p);
-               sgs->sum_h_nr_running += rq->cfs.h_nr_queued - local;
+               sgs->sum_h_nr_running += rq->cfs.h_nr_runnable - local;
 
                nr_running = rq->nr_running - local;
                sgs->sum_nr_running += nr_running;
@@ -11464,7 +11464,7 @@ static struct rq *sched_balance_find_src_rq(struct lb_env *env,
                if (rt > env->fbq_type)
                        continue;
 
-               nr_running = rq->cfs.h_nr_queued;
+               nr_running = rq->cfs.h_nr_runnable;
                if (!nr_running)
                        continue;
 
@@ -11623,7 +11623,7 @@ static int need_active_balance(struct lb_env *env)
         * available on dst_cpu.
         */
        if (env->idle &&
-           (env->src_rq->cfs.h_nr_queued == 1)) {
+           (env->src_rq->cfs.h_nr_runnable == 1)) {
                if ((check_cpu_capacity(env->src_rq, sd)) &&
                    (capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100))
                        return 1;
@@ -12364,7 +12364,7 @@ static void nohz_balancer_kick(struct rq *rq)
                 * If there's a runnable CFS task and the current CPU has reduced
                 * capacity, kick the ILB to see if there's a better CPU to run on:
                 */
-               if (rq->cfs.h_nr_queued >= 1 && check_cpu_capacity(rq, sd)) {
+               if (rq->cfs.h_nr_runnable >= 1 && check_cpu_capacity(rq, sd)) {
                        flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
                        goto unlock;
                }
index 2bad0b508dfc671085d33720d21aecf4b3287e4e..7a8534a2deffdba785fe734199be6a920bf52c2f 100644 (file)
@@ -275,7 +275,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
  *
  *   group: [ see update_cfs_group() ]
  *     se_weight()   = tg->weight * grq->load_avg / tg->load_avg
- *     se_runnable() = grq->h_nr_queued
+ *     se_runnable() = grq->h_nr_runnable
  *
  *   runnable_sum = se_runnable() * runnable = grq->runnable_sum
  *   runnable_avg = runnable_sum
@@ -321,7 +321,7 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq)
 {
        if (___update_load_sum(now, &cfs_rq->avg,
                                scale_load_down(cfs_rq->load.weight),
-                               cfs_rq->h_nr_queued - cfs_rq->h_nr_delayed,
+                               cfs_rq->h_nr_runnable,
                                cfs_rq->curr != NULL)) {
 
                ___update_load_avg(&cfs_rq->avg, 1);
index 869d5d3521f20c3c150885b0bd861dfc23abb770..4374c660f5c774edad2e67f7535a22391661231b 100644 (file)
@@ -900,11 +900,8 @@ struct dl_rq {
 
 static inline void se_update_runnable(struct sched_entity *se)
 {
-       if (!entity_is_task(se)) {
-               struct cfs_rq *cfs_rq = se->my_q;
-
-               se->runnable_weight = cfs_rq->h_nr_queued - cfs_rq->h_nr_delayed;
-       }
+       if (!entity_is_task(se))
+               se->runnable_weight = se->my_q->h_nr_runnable;
 }
 
 static inline long se_runnable(struct sched_entity *se)