Merge branch 'sched/warnings' into sched/core, to pick up WARN_ON_ONCE() conversion...

[linux-2.6-block.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 28f10dccd1944806300c34182f8f4cbdcf32c65f..efceb670e755b63b5d5ec97f3ff999f621d767e8 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -799,8 +799,6 @@ void init_entity_runnable_average(struct sched_entity *se)
         /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
  }
  
-static void attach_entity_cfs_rq(struct sched_entity *se);
-
  /*
   * With new tasks being created, their initial util_avgs are extrapolated
   * based on the cfs_rq's current util_avg:
@@ -835,20 +833,6 @@ void post_init_entity_util_avg(struct task_struct *p)
         long cpu_scale = arch_scale_cpu_capacity(cpu_of(rq_of(cfs_rq)));
         long cap = (long)(cpu_scale - cfs_rq->avg.util_avg) / 2;
  
-       if (cap > 0) {
-               if (cfs_rq->avg.util_avg != 0) {
-                       sa->util_avg  = cfs_rq->avg.util_avg * se->load.weight;
-                       sa->util_avg /= (cfs_rq->avg.load_avg + 1);
-
-                       if (sa->util_avg > cap)
-                               sa->util_avg = cap;
-               } else {
-                       sa->util_avg = cap;
-               }
-       }
-
-       sa->runnable_avg = sa->util_avg;
-
         if (p->sched_class != &fair_sched_class) {
                 /*
                  * For !fair tasks do:
@@ -864,7 +848,19 @@ void post_init_entity_util_avg(struct task_struct *p)
                 return;
         }
  
-       attach_entity_cfs_rq(se);
+       if (cap > 0) {
+               if (cfs_rq->avg.util_avg != 0) {
+                       sa->util_avg  = cfs_rq->avg.util_avg * se->load.weight;
+                       sa->util_avg /= (cfs_rq->avg.load_avg + 1);
+
+                       if (sa->util_avg > cap)
+                               sa->util_avg = cap;
+               } else {
+                       sa->util_avg = cap;
+               }
+       }
+
+       sa->runnable_avg = sa->util_avg;
  }
  
  #else /* !CONFIG_SMP */
@@ -3838,8 +3834,7 @@ static void migrate_se_pelt_lag(struct sched_entity *se) {}
   * @cfs_rq: cfs_rq to update
   *
   * The cfs_rq avg is the direct sum of all its entities (blocked and runnable)
- * avg. The immediate corollary is that all (fair) tasks must be attached, see
- * post_init_entity_util_avg().
+ * avg. The immediate corollary is that all (fair) tasks must be attached.
   *
   * cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
   *
@@ -4003,6 +3998,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
  #define UPDATE_TG      0x1
  #define SKIP_AGE_LOAD  0x2
  #define DO_ATTACH      0x4
+#define DO_DETACH      0x8
  
  /* Update task and its cfs_rq load average */
  static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -4032,6 +4028,13 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
                 attach_entity_load_avg(cfs_rq, se);
                 update_tg_load_avg(cfs_rq);
  
+       } else if (flags & DO_DETACH) {
+               /*
+                * DO_DETACH means we're here from dequeue_entity()
+                * and we are migrating task out of the CPU.
+                */
+               detach_entity_load_avg(cfs_rq, se);
+               update_tg_load_avg(cfs_rq);
         } else if (decayed) {
                 cfs_rq_util_change(cfs_rq, 0);
  
@@ -4064,8 +4067,8 @@ static void remove_entity_load_avg(struct sched_entity *se)
  
         /*
          * tasks cannot exit without having gone through wake_up_new_task() ->
-        * post_init_entity_util_avg() which will have added things to the
-        * cfs_rq, so we can remove unconditionally.
+        * enqueue_task_fair() which will have added things to the cfs_rq,
+        * so we can remove unconditionally.
          */
  
         sync_entity_load_avg(se);
@@ -4262,7 +4265,7 @@ static inline int task_fits_capacity(struct task_struct *p,
  
  static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
  {
-       if (!static_branch_unlikely(&sched_asym_cpucapacity))
+       if (!sched_asym_cpucap_active())
                 return;
  
         if (!p || p->nr_cpus_allowed == 1) {
@@ -4292,6 +4295,7 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
  #define UPDATE_TG      0x0
  #define SKIP_AGE_LOAD  0x0
  #define DO_ATTACH      0x0
+#define DO_DETACH      0x0
  
  static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int not_used1)
  {
@@ -4434,7 +4438,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         /*
          * When enqueuing a sched_entity, we must:
          *   - Update loads to have both entity and cfs_rq synced with now.
-        *   - Add its load to cfs_rq->runnable_avg
+        *   - For group_entity, update its runnable_weight to reflect the new
+        *     h_nr_running of its group cfs_rq.
          *   - For group_entity, update its weight to reflect the new share of
          *     its group cfs_rq
          *   - Add its new weight to cfs_rq->load.weight
@@ -4511,6 +4516,11 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
  static void
  dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  {
+       int action = UPDATE_TG;
+
+       if (entity_is_task(se) && task_on_rq_migrating(task_of(se)))
+               action |= DO_DETACH;
+
         /*
          * Update run-time statistics of the 'current'.
          */
@@ -4519,12 +4529,13 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         /*
          * When dequeuing a sched_entity, we must:
          *   - Update loads to have both entity and cfs_rq synced with now.
-        *   - Subtract its load from the cfs_rq->runnable_avg.
+        *   - For group_entity, update its runnable_weight to reflect the new
+        *     h_nr_running of its group cfs_rq.
          *   - Subtract its previous weight from cfs_rq->load.weight.
          *   - For group entity, update its weight to reflect the new share
          *     of its group cfs_rq.
          */
-       update_load_avg(cfs_rq, se, UPDATE_TG);
+       update_load_avg(cfs_rq, se, action);
         se_update_runnable(se);
  
         update_stats_dequeue_fair(cfs_rq, se, flags);
@@ -5893,8 +5904,8 @@ dequeue_throttle:
  #ifdef CONFIG_SMP
  
  /* Working cpumask for: load_balance, load_balance_newidle. */
-DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
-DEFINE_PER_CPU(cpumask_var_t, select_rq_mask);
+static DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
+static DEFINE_PER_CPU(cpumask_var_t, select_rq_mask);
  
  #ifdef CONFIG_NO_HZ_COMMON
  
@@ -6506,7 +6517,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
  
  static inline bool asym_fits_capacity(unsigned long task_util, int cpu)
  {
-       if (static_branch_unlikely(&sched_asym_cpucapacity))
+       if (sched_asym_cpucap_active())
                 return fits_capacity(task_util, capacity_of(cpu));
  
         return true;
@@ -6526,7 +6537,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
          * On asymmetric system, update task utilization because we will check
          * that the task fits with cpu's capacity.
          */
-       if (static_branch_unlikely(&sched_asym_cpucapacity)) {
+       if (sched_asym_cpucap_active()) {
                 sync_entity_load_avg(&p->se);
                 task_util = uclamp_task_util(p);
         }
@@ -6580,7 +6591,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
          * For asymmetric CPU capacity systems, our domain of interest is
          * sd_asym_cpucapacity rather than sd_llc.
          */
-       if (static_branch_unlikely(&sched_asym_cpucapacity)) {
+       if (sched_asym_cpucap_active()) {
                 sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
                 /*
                  * On an asymmetric CPU capacity system where an exclusive
@@ -7076,8 +7087,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
         return new_cpu;
  }
  
-static void detach_entity_cfs_rq(struct sched_entity *se);
-
  /*
   * Called immediately before a task is migrated to a new CPU; task_cpu(p) and
   * cfs_rq_of(p) references at time of call are still valid and identify the
@@ -7099,15 +7108,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
                 se->vruntime -= u64_u32_load(cfs_rq->min_vruntime);
         }
  
-       if (p->on_rq == TASK_ON_RQ_MIGRATING) {
-               /*
-                * In case of TASK_ON_RQ_MIGRATING we in fact hold the 'old'
-                * rq->lock and can modify state directly.
-                */
-               lockdep_assert_rq_held(task_rq(p));
-               detach_entity_cfs_rq(se);
-
-       } else {
+       if (!task_on_rq_migrating(p)) {
                 remove_entity_load_avg(se);
  
                 /*
@@ -10916,8 +10917,7 @@ static bool update_nohz_stats(struct rq *rq)
   * can be a simple update of blocked load or a complete load balance with
   * tasks movement depending of flags.
   */
-static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
-                              enum cpu_idle_type idle)
+static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags)
  {
         /* Earliest time when we have to do rebalance again */
         unsigned long now = jiffies;
@@ -11032,7 +11032,7 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
         if (idle != CPU_IDLE)
                 return false;
  
-       _nohz_idle_balance(this_rq, flags, idle);
+       _nohz_idle_balance(this_rq, flags);
  
         return true;
  }
@@ -11052,7 +11052,7 @@ void nohz_run_idle_balance(int cpu)
          * (ie NOHZ_STATS_KICK set) and will do the same.
          */
         if ((flags == NOHZ_NEWILB_KICK) && !need_resched())
-               _nohz_idle_balance(cpu_rq(cpu), NOHZ_STATS_KICK, CPU_IDLE);
+               _nohz_idle_balance(cpu_rq(cpu), NOHZ_STATS_KICK);
  }
  
  static void nohz_newidle_balance(struct rq *this_rq)
@@ -11552,6 +11552,17 @@ static void detach_entity_cfs_rq(struct sched_entity *se)
  {
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
  
+#ifdef CONFIG_SMP
+       /*
+        * In case the task sched_avg hasn't been attached:
+        * - A forked task which hasn't been woken up by wake_up_new_task().
+        * - A task which has been woken up by try_to_wake_up() but is
+        *   waiting for actually being woken up by sched_ttwu_pending().
+        */
+       if (!se->avg.last_update_time)
+               return;
+#endif
+
         /* Catch up with the cfs_rq and remove our load when we leave */
         update_load_avg(cfs_rq, se, 0);
         detach_entity_load_avg(cfs_rq, se);
@@ -11563,14 +11574,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
  {
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
  
-#ifdef CONFIG_FAIR_GROUP_SCHED
-       /*
-        * Since the real-depth could have been changed (only FAIR
-        * class maintain depth value), reset depth properly.
-        */
-       se->depth = se->parent ? se->parent->depth + 1 : 0;
-#endif
-
         /* Synchronize entity with its cfs_rq */
         update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
         attach_entity_load_avg(cfs_rq, se);
@@ -11666,39 +11669,25 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
  }
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
-static void task_set_group_fair(struct task_struct *p)
+static void task_change_group_fair(struct task_struct *p)
  {
-       struct sched_entity *se = &p->se;
-
-       set_task_rq(p, task_cpu(p));
-       se->depth = se->parent ? se->parent->depth + 1 : 0;
-}
+       /*
+        * We couldn't detach or attach a forked task which
+        * hasn't been woken up by wake_up_new_task().
+        */
+       if (READ_ONCE(p->__state) == TASK_NEW)
+               return;
  
-static void task_move_group_fair(struct task_struct *p)
-{
         detach_task_cfs_rq(p);
-       set_task_rq(p, task_cpu(p));
  
  #ifdef CONFIG_SMP
         /* Tell se's cfs_rq has been changed -- migrated */
         p->se.avg.last_update_time = 0;
  #endif
+       set_task_rq(p, task_cpu(p));
         attach_task_cfs_rq(p);
  }
  
-static void task_change_group_fair(struct task_struct *p, int type)
-{
-       switch (type) {
-       case TASK_SET_GROUP:
-               task_set_group_fair(p);
-               break;
-
-       case TASK_MOVE_GROUP:
-               task_move_group_fair(p);
-               break;
-       }
-}
-
  void free_fair_sched_group(struct task_group *tg)
  {
         int i;
@@ -12075,6 +12064,13 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
  __init void init_sched_fair_class(void)
  {
  #ifdef CONFIG_SMP
+       int i;
+
+       for_each_possible_cpu(i) {
+               zalloc_cpumask_var_node(&per_cpu(load_balance_mask, i), GFP_KERNEL, cpu_to_node(i));
+               zalloc_cpumask_var_node(&per_cpu(select_rq_mask,    i), GFP_KERNEL, cpu_to_node(i));
+       }
+
         open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
  
  #ifdef CONFIG_NO_HZ_COMMON