sched/fair: Unify pick_{,next_}_task_fair()
authorPeter Zijlstra <peterz@infradead.org>
Wed, 3 Apr 2024 07:50:16 +0000 (09:50 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Sat, 17 Aug 2024 09:06:41 +0000 (11:06 +0200)
Implement pick_next_task_fair() in terms of pick_task_fair() to
de-duplicate the pick loop.

More importantly, this makes all the pick loops use the
state-invariant form, which is useful to introduce further re-try
conditions in later patches.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Tested-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lkml.kernel.org/r/20240727105028.725062368@infradead.org
kernel/sched/fair.c

index 175ccec8c53f8ad97dcf5c801656696c80ed5ab6..1452c536b578079662a986748529b094f38c8a66 100644 (file)
@@ -8451,7 +8451,6 @@ preempt:
        resched_curr(rq);
 }
 
-#ifdef CONFIG_SMP
 static struct task_struct *pick_task_fair(struct rq *rq)
 {
        struct sched_entity *se;
@@ -8463,7 +8462,7 @@ again:
                return NULL;
 
        do {
-               /* When we pick for a remote RQ, we'll not have done put_prev_entity() */
+               /* Might not have done put_prev_entity() */
                if (cfs_rq->curr && cfs_rq->curr->on_rq)
                        update_curr(cfs_rq);
 
@@ -8484,19 +8483,19 @@ again:
 
        return task_of(se);
 }
-#endif
 
 struct task_struct *
 pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
-       struct cfs_rq *cfs_rq = &rq->cfs;
        struct sched_entity *se;
        struct task_struct *p;
        int new_tasks;
 
 again:
-       if (!sched_fair_runnable(rq))
+       p = pick_task_fair(rq);
+       if (!p)
                goto idle;
+       se = &p->se;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
        if (!prev || prev->sched_class != &fair_sched_class)
@@ -8508,52 +8507,14 @@ again:
         *
         * Therefore attempt to avoid putting and setting the entire cgroup
         * hierarchy, only change the part that actually changes.
-        */
-
-       do {
-               struct sched_entity *curr = cfs_rq->curr;
-
-               /*
-                * Since we got here without doing put_prev_entity() we also
-                * have to consider cfs_rq->curr. If it is still a runnable
-                * entity, update_curr() will update its vruntime, otherwise
-                * forget we've ever seen it.
-                */
-               if (curr) {
-                       if (curr->on_rq)
-                               update_curr(cfs_rq);
-                       else
-                               curr = NULL;
-
-                       /*
-                        * This call to check_cfs_rq_runtime() will do the
-                        * throttle and dequeue its entity in the parent(s).
-                        * Therefore the nr_running test will indeed
-                        * be correct.
-                        */
-                       if (unlikely(check_cfs_rq_runtime(cfs_rq))) {
-                               cfs_rq = &rq->cfs;
-
-                               if (!cfs_rq->nr_running)
-                                       goto idle;
-
-                               goto simple;
-                       }
-               }
-
-               se = pick_next_entity(cfs_rq);
-               cfs_rq = group_cfs_rq(se);
-       } while (cfs_rq);
-
-       p = task_of(se);
-
-       /*
+        *
         * Since we haven't yet done put_prev_entity and if the selected task
         * is a different task than we started out with, try and touch the
         * least amount of cfs_rqs.
         */
        if (prev != p) {
                struct sched_entity *pse = &prev->se;
+               struct cfs_rq *cfs_rq;
 
                while (!(cfs_rq = is_same_group(se, pse))) {
                        int se_depth = se->depth;
@@ -8579,13 +8540,8 @@ simple:
        if (prev)
                put_prev_task(rq, prev);
 
-       do {
-               se = pick_next_entity(cfs_rq);
-               set_next_entity(cfs_rq, se);
-               cfs_rq = group_cfs_rq(se);
-       } while (cfs_rq);
-
-       p = task_of(se);
+       for_each_sched_entity(se)
+               set_next_entity(cfs_rq_of(se), se);
 
 done: __maybe_unused;
 #ifdef CONFIG_SMP