sched: Check for an idle shared cache in select_task_rq_fair()
authorMike Galbraith <efault@gmx.de>
Tue, 27 Oct 2009 14:35:38 +0000 (15:35 +0100)
committerIngo Molnar <mingo@elte.hu>
Wed, 4 Nov 2009 17:46:22 +0000 (18:46 +0100)
When waking affine, check for an idle shared cache, and if
found, wake to that CPU/sibling instead of the waker's CPU.

This improves pgsql+oltp ramp up by roughly 8%. Possibly more
for other loads, depending on overlap. The trade-off is a
roughly 1% peak downturn if tasks are truly synchronous.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: <stable@kernel.org>
LKML-Reference: <1256654138.17752.7.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
kernel/sched_fair.c

index 4e777b47eedac1f5f779c002091e2dc8b696abe1..da87385683ccc6f3de8f6deb57a820ad59c6f9ce 100644 (file)
@@ -1372,11 +1372,36 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
                                want_sd = 0;
                }
 
-               if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
-                   cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
+               if (want_affine && (tmp->flags & SD_WAKE_AFFINE)) {
+                       int candidate = -1, i;
 
-                       affine_sd = tmp;
-                       want_affine = 0;
+                       if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp)))
+                               candidate = cpu;
+
+                       /*
+                        * Check for an idle shared cache.
+                        */
+                       if (tmp->flags & SD_PREFER_SIBLING) {
+                               if (candidate == cpu) {
+                                       if (!cpu_rq(prev_cpu)->cfs.nr_running)
+                                               candidate = prev_cpu;
+                               }
+
+                               if (candidate == -1 || candidate == cpu) {
+                                       for_each_cpu(i, sched_domain_span(tmp)) {
+                                               if (!cpu_rq(i)->cfs.nr_running) {
+                                                       candidate = i;
+                                                       break;
+                                               }
+                                       }
+                               }
+                       }
+
+                       if (candidate >= 0) {
+                               affine_sd = tmp;
+                               want_affine = 0;
+                               cpu = candidate;
+                       }
                }
 
                if (!want_sd && !want_affine)