sched/fair: Sanitize vruntime of entity being migrated

author Vincent Guittot <vincent.guittot@linaro.org>

Fri, 17 Mar 2023 16:08:10 +0000 (17:08 +0100)

committer Peter Zijlstra <peterz@infradead.org>

Tue, 21 Mar 2023 13:43:04 +0000 (14:43 +0100)
author Vincent Guittot <vincent.guittot@linaro.org>
Fri, 17 Mar 2023 16:08:10 +0000 (17:08 +0100)
committer Peter Zijlstra <peterz@infradead.org>
Tue, 21 Mar 2023 13:43:04 +0000 (14:43 +0100)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 488655f2319f5d7660be0772e6aafb1fc1a0e80a..0d18c3969f90400e5c91e1e0132268dcff5feb65 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2084,6 +2084,9 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
  
  void activate_task(struct rq *rq, struct task_struct *p, int flags)
  {
+       if (task_on_rq_migrating(p))
+               flags |= ENQUEUE_MIGRATED;
+
         enqueue_task(rq, p, flags);
  
         p->on_rq = TASK_ON_RQ_QUEUED;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 7a1b1f855b9635e75282913850b70ffba2006322..6986ea31c9844719cf083ee1b60e3163add9c9db 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4648,11 +4648,33 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
  #endif
  }
  
+static inline bool entity_is_long_sleeper(struct sched_entity *se)
+{
+       struct cfs_rq *cfs_rq;
+       u64 sleep_time;
+
+       if (se->exec_start == 0)
+               return false;
+
+       cfs_rq = cfs_rq_of(se);
+
+       sleep_time = rq_clock_task(rq_of(cfs_rq));
+
+       /* Happen while migrating because of clock task divergence */
+       if (sleep_time <= se->exec_start)
+               return false;
+
+       sleep_time -= se->exec_start;
+       if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
+               return true;
+
+       return false;
+}
+
  static void
  place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
  {
         u64 vruntime = cfs_rq->min_vruntime;
-       u64 sleep_time;
  
         /*
          * The 'current' period is already promised to the current tasks,
@@ -4684,13 +4706,24 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
  
         /*
          * Pull vruntime of the entity being placed to the base level of
-        * cfs_rq, to prevent boosting it if placed backwards.  If the entity
-        * slept for a long time, don't even try to compare its vruntime with
-        * the base as it may be too far off and the comparison may get
-        * inversed due to s64 overflow.
-        */
-       sleep_time = rq_clock_task(rq_of(cfs_rq)) - se->exec_start;
-       if ((s64)sleep_time > 60LL * NSEC_PER_SEC)
+        * cfs_rq, to prevent boosting it if placed backwards.
+        * However, min_vruntime can advance much faster than real time, with
+        * the extreme being when an entity with the minimal weight always runs
+        * on the cfs_rq. If the waking entity slept for a long time, its
+        * vruntime difference from min_vruntime may overflow s64 and their
+        * comparison may get inversed, so ignore the entity's original
+        * vruntime in that case.
+        * The maximal vruntime speedup is given by the ratio of normal to
+        * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES.
+        * When placing a migrated waking entity, its exec_start has been set
+        * from a different rq. In order to take into account a possible
+        * divergence between new and prev rq's clocks task because of irq and
+        * stolen time, we take an additional margin.
+        * So, cutting off on the sleep time of
+        *     2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days
+        * should be safe.
+        */
+       if (entity_is_long_sleeper(se))
                 se->vruntime = vruntime;
         else
                 se->vruntime = max_vruntime(se->vruntime, vruntime);
@@ -4770,6 +4803,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  
         if (flags & ENQUEUE_WAKEUP)
                 place_entity(cfs_rq, se, 0);
+       /* Entity has migrated, no longer consider this task hot */
+       if (flags & ENQUEUE_MIGRATED)
+               se->exec_start = 0;
  
         check_schedstat_required();
         update_stats_enqueue_fair(cfs_rq, se, flags);
@@ -7657,9 +7693,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
         /* Tell new CPU we are migrated */
         se->avg.last_update_time = 0;
  
-       /* We have migrated, no longer consider this task hot */
-       se->exec_start = 0;
-
         update_scan_period(p, new_cpu);
  }
author	Vincent Guittot <vincent.guittot@linaro.org>
	Fri, 17 Mar 2023 16:08:10 +0000 (17:08 +0100)
committer	Peter Zijlstra <peterz@infradead.org>
	Tue, 21 Mar 2023 13:43:04 +0000 (14:43 +0100)
kernel/sched/core.c		patch \| blob \| blame \| history
kernel/sched/fair.c		patch \| blob \| blame \| history