Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 25 Jul 2016 20:59:34 +0000 (13:59 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 25 Jul 2016 20:59:34 +0000 (13:59 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 25 Jul 2016 20:59:34 +0000 (13:59 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 25 Jul 2016 20:59:34 +0000 (13:59 -0700)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c

index eea2a6f72b31c089d1b100eaefff32d1c6be4a87..1ef5e48b3a3638504ad42d1b1e7e75c959f06768 100644 (file)
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -301,8 +301,6 @@ static void kvm_register_steal_time(void)
         if (!has_steal_clock)
                 return;
  
-       memset(st, 0, sizeof(*st));
-
         wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
         pr_info("kvm-stealtime: cpu %d, msr %llx\n",
                 cpu, (unsigned long long) slow_virt_to_phys(st));
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 253538f29ade890dcd48d1708fcd15ae362653be..d99218a1e04370683dd239de59c38e48a04d2eb7 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -219,9 +219,10 @@ extern void proc_sched_set_task(struct task_struct *p);
  #define TASK_WAKING            256
  #define TASK_PARKED            512
  #define TASK_NOLOAD            1024
-#define TASK_STATE_MAX         2048
+#define TASK_NEW               2048
+#define TASK_STATE_MAX         4096
  
-#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"
+#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
  
  extern char ___assert_task_state[1 - 2*!!(
                 sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
@@ -2139,6 +2140,9 @@ static inline void put_task_struct(struct task_struct *t)
                 __put_task_struct(t);
  }
  
+struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+struct task_struct *try_get_task_struct(struct task_struct **ptask);
+
  #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  extern void task_cputime(struct task_struct *t,
                          cputime_t *utime, cputime_t *stime);
diff --git a/kernel/exit.c b/kernel/exit.c

index 0b40791b9e70259b50458b4c67e1d3325a7a4b6b..84ae830234f8fea6328690ebf977a7063aa91097 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -210,6 +210,82 @@ repeat:
                 goto repeat;
  }
  
+/*
+ * Note that if this function returns a valid task_struct pointer (!NULL)
+ * task->usage must remain >0 for the duration of the RCU critical section.
+ */
+struct task_struct *task_rcu_dereference(struct task_struct **ptask)
+{
+       struct sighand_struct *sighand;
+       struct task_struct *task;
+
+       /*
+        * We need to verify that release_task() was not called and thus
+        * delayed_put_task_struct() can't run and drop the last reference
+        * before rcu_read_unlock(). We check task->sighand != NULL,
+        * but we can read the already freed and reused memory.
+        */
+retry:
+       task = rcu_dereference(*ptask);
+       if (!task)
+               return NULL;
+
+       probe_kernel_address(&task->sighand, sighand);
+
+       /*
+        * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
+        * was already freed we can not miss the preceding update of this
+        * pointer.
+        */
+       smp_rmb();
+       if (unlikely(task != READ_ONCE(*ptask)))
+               goto retry;
+
+       /*
+        * We've re-checked that "task == *ptask", now we have two different
+        * cases:
+        *
+        * 1. This is actually the same task/task_struct. In this case
+        *    sighand != NULL tells us it is still alive.
+        *
+        * 2. This is another task which got the same memory for task_struct.
+        *    We can't know this of course, and we can not trust
+        *    sighand != NULL.
+        *
+        *    In this case we actually return a random value, but this is
+        *    correct.
+        *
+        *    If we return NULL - we can pretend that we actually noticed that
+        *    *ptask was updated when the previous task has exited. Or pretend
+        *    that probe_slab_address(&sighand) reads NULL.
+        *
+        *    If we return the new task (because sighand is not NULL for any
+        *    reason) - this is fine too. This (new) task can't go away before
+        *    another gp pass.
+        *
+        *    And note: We could even eliminate the false positive if re-read
+        *    task->sighand once again to avoid the falsely NULL. But this case
+        *    is very unlikely so we don't care.
+        */
+       if (!sighand)
+               return NULL;
+
+       return task;
+}
+
+struct task_struct *try_get_task_struct(struct task_struct **ptask)
+{
+       struct task_struct *task;
+
+       rcu_read_lock();
+       task = task_rcu_dereference(ptask);
+       if (task)
+               get_task_struct(task);
+       rcu_read_unlock();
+
+       return task;
+}
+
  /*
   * Determine if a process group is "orphaned", according to the POSIX
   * definition in 2.2.2.52.  Orphaned process groups are not to be affected
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index af0ef74df23c657563ba567fff72c23936228880..5c883fe8e44016df1109e8f66dd73377dfecb5e9 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2342,11 +2342,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
  
         __sched_fork(clone_flags, p);
         /*
-        * We mark the process as running here. This guarantees that
+        * We mark the process as NEW here. This guarantees that
          * nobody will actually run it, and a signal or other external
          * event cannot wake it up and insert it on the runqueue either.
          */
-       p->state = TASK_RUNNING;
+       p->state = TASK_NEW;
  
         /*
          * Make sure we do not leak PI boosting priority to the child.
@@ -2383,8 +2383,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
                 p->sched_class = &fair_sched_class;
         }
  
-       if (p->sched_class->task_fork)
-               p->sched_class->task_fork(p);
+       init_entity_runnable_average(&p->se);
  
         /*
          * The child is not yet in the pid-hash so no cgroup attach races,
@@ -2394,7 +2393,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
          * Silence PROVE_RCU.
          */
         raw_spin_lock_irqsave(&p->pi_lock, flags);
-       set_task_cpu(p, cpu);
+       /*
+        * We're setting the cpu for the first time, we don't migrate,
+        * so use __set_task_cpu().
+        */
+       __set_task_cpu(p, cpu);
+       if (p->sched_class->task_fork)
+               p->sched_class->task_fork(p);
         raw_spin_unlock_irqrestore(&p->pi_lock, flags);
  
  #ifdef CONFIG_SCHED_INFO
@@ -2526,16 +2531,18 @@ void wake_up_new_task(struct task_struct *p)
         struct rq_flags rf;
         struct rq *rq;
  
-       /* Initialize new task's runnable average */
-       init_entity_runnable_average(&p->se);
         raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
+       p->state = TASK_RUNNING;
  #ifdef CONFIG_SMP
         /*
          * Fork balancing, do it here and not earlier because:
          *  - cpus_allowed can change in the fork path
          *  - any previously selected cpu might disappear through hotplug
+        *
+        * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
+        * as we're not fully set-up yet.
          */
-       set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
+       __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
  #endif
         rq = __task_rq_lock(p, &rf);
         post_init_entity_util_avg(&p->se);
@@ -3161,6 +3168,9 @@ static noinline void __schedule_bug(struct task_struct *prev)
                 pr_cont("\n");
         }
  #endif
+       if (panic_on_warn)
+               panic("scheduling while atomic\n");
+
         dump_stack();
         add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
  }
@@ -4752,7 +4762,8 @@ out_unlock:
   * @len: length in bytes of the bitmask pointed to by user_mask_ptr
   * @user_mask_ptr: user-space pointer to hold the current cpu mask
   *
- * Return: 0 on success. An error code otherwise.
+ * Return: size of CPU mask copied to user_mask_ptr on success. An
+ * error code otherwise.
   */
  SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
                 unsigned long __user *, user_mask_ptr)
@@ -7233,7 +7244,6 @@ static void sched_rq_cpu_starting(unsigned int cpu)
         struct rq *rq = cpu_rq(cpu);
  
         rq->calc_load_update = calc_load_update;
-       account_reset_rq(rq);
         update_max_interval();
  }
  
@@ -7713,6 +7723,8 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
         INIT_LIST_HEAD(&tg->children);
         list_add_rcu(&tg->siblings, &parent->children);
         spin_unlock_irqrestore(&task_group_lock, flags);
+
+       online_fair_sched_group(tg);
  }
  
  /* rcu callback to free various structures associated with a task group */
@@ -7741,27 +7753,9 @@ void sched_offline_group(struct task_group *tg)
         spin_unlock_irqrestore(&task_group_lock, flags);
  }
  
-/* change task's runqueue when it moves between groups.
- *     The caller of this function should have put the task in its new group
- *     by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
- *     reflect its new group.
- */
-void sched_move_task(struct task_struct *tsk)
+static void sched_change_group(struct task_struct *tsk, int type)
  {
         struct task_group *tg;
-       int queued, running;
-       struct rq_flags rf;
-       struct rq *rq;
-
-       rq = task_rq_lock(tsk, &rf);
-
-       running = task_current(rq, tsk);
-       queued = task_on_rq_queued(tsk);
-
-       if (queued)
-               dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
-       if (unlikely(running))
-               put_prev_task(rq, tsk);
  
         /*
          * All callers are synchronized by task_rq_lock(); we do not use RCU
@@ -7774,11 +7768,37 @@ void sched_move_task(struct task_struct *tsk)
         tsk->sched_task_group = tg;
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
-       if (tsk->sched_class->task_move_group)
-               tsk->sched_class->task_move_group(tsk);
+       if (tsk->sched_class->task_change_group)
+               tsk->sched_class->task_change_group(tsk, type);
         else
  #endif
                 set_task_rq(tsk, task_cpu(tsk));
+}
+
+/*
+ * Change task's runqueue when it moves between groups.
+ *
+ * The caller of this function should have put the task in its new group by
+ * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect
+ * its new group.
+ */
+void sched_move_task(struct task_struct *tsk)
+{
+       int queued, running;
+       struct rq_flags rf;
+       struct rq *rq;
+
+       rq = task_rq_lock(tsk, &rf);
+
+       running = task_current(rq, tsk);
+       queued = task_on_rq_queued(tsk);
+
+       if (queued)
+               dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
+       if (unlikely(running))
+               put_prev_task(rq, tsk);
+
+       sched_change_group(tsk, TASK_MOVE_GROUP);
  
         if (unlikely(running))
                 tsk->sched_class->set_curr_task(rq);
@@ -8206,15 +8226,27 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
         sched_free_group(tg);
  }
  
+/*
+ * This is called before wake_up_new_task(), therefore we really only
+ * have to set its group bits, all the other stuff does not apply.
+ */
  static void cpu_cgroup_fork(struct task_struct *task)
  {
-       sched_move_task(task);
+       struct rq_flags rf;
+       struct rq *rq;
+
+       rq = task_rq_lock(task, &rf);
+
+       sched_change_group(task, TASK_SET_GROUP);
+
+       task_rq_unlock(rq, task, &rf);
  }
  
  static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
  {
         struct task_struct *task;
         struct cgroup_subsys_state *css;
+       int ret = 0;
  
         cgroup_taskset_for_each(task, css, tset) {
  #ifdef CONFIG_RT_GROUP_SCHED
@@ -8225,8 +8257,24 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
                 if (task->sched_class != &fair_sched_class)
                         return -EINVAL;
  #endif
+               /*
+                * Serialize against wake_up_new_task() such that if its
+                * running, we're sure to observe its full state.
+                */
+               raw_spin_lock_irq(&task->pi_lock);
+               /*
+                * Avoid calling sched_move_task() before wake_up_new_task()
+                * has happened. This would lead to problems with PELT, due to
+                * move wanting to detach+attach while we're not attached yet.
+                */
+               if (task->state == TASK_NEW)
+                       ret = -EINVAL;
+               raw_spin_unlock_irq(&task->pi_lock);
+
+               if (ret)
+                       break;
         }
-       return 0;
+       return ret;
  }
  
  static void cpu_cgroup_attach(struct cgroup_taskset *tset)
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c

index 41f85c4d09387a8bd03299ef00f6e79482f68b40..bc0b309c3f19e2ce7e07b2d0419d9157d423497a 100644 (file)
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -25,15 +25,13 @@ enum cpuacct_stat_index {
         CPUACCT_STAT_NSTATS,
  };
  
-enum cpuacct_usage_index {
-       CPUACCT_USAGE_USER,     /* ... user mode */
-       CPUACCT_USAGE_SYSTEM,   /* ... kernel mode */
-
-       CPUACCT_USAGE_NRUSAGE,
+static const char * const cpuacct_stat_desc[] = {
+       [CPUACCT_STAT_USER] = "user",
+       [CPUACCT_STAT_SYSTEM] = "system",
  };
  
  struct cpuacct_usage {
-       u64     usages[CPUACCT_USAGE_NRUSAGE];
+       u64     usages[CPUACCT_STAT_NSTATS];
  };
  
  /* track cpu usage of a group of tasks and its child groups */
@@ -108,16 +106,16 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css)
  }
  
  static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
-                                enum cpuacct_usage_index index)
+                                enum cpuacct_stat_index index)
  {
         struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
         u64 data;
  
         /*
-        * We allow index == CPUACCT_USAGE_NRUSAGE here to read
+        * We allow index == CPUACCT_STAT_NSTATS here to read
          * the sum of suages.
          */
-       BUG_ON(index > CPUACCT_USAGE_NRUSAGE);
+       BUG_ON(index > CPUACCT_STAT_NSTATS);
  
  #ifndef CONFIG_64BIT
         /*
@@ -126,11 +124,11 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
         raw_spin_lock_irq(&cpu_rq(cpu)->lock);
  #endif
  
-       if (index == CPUACCT_USAGE_NRUSAGE) {
+       if (index == CPUACCT_STAT_NSTATS) {
                 int i = 0;
  
                 data = 0;
-               for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
+               for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
                         data += cpuusage->usages[i];
         } else {
                 data = cpuusage->usages[index];
@@ -155,7 +153,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
         raw_spin_lock_irq(&cpu_rq(cpu)->lock);
  #endif
  
-       for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
+       for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
                 cpuusage->usages[i] = val;
  
  #ifndef CONFIG_64BIT
@@ -165,7 +163,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
  
  /* return total cpu usage (in nanoseconds) of a group */
  static u64 __cpuusage_read(struct cgroup_subsys_state *css,
-                          enum cpuacct_usage_index index)
+                          enum cpuacct_stat_index index)
  {
         struct cpuacct *ca = css_ca(css);
         u64 totalcpuusage = 0;
@@ -180,18 +178,18 @@ static u64 __cpuusage_read(struct cgroup_subsys_state *css,
  static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
                               struct cftype *cft)
  {
-       return __cpuusage_read(css, CPUACCT_USAGE_USER);
+       return __cpuusage_read(css, CPUACCT_STAT_USER);
  }
  
  static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
                              struct cftype *cft)
  {
-       return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM);
+       return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
  }
  
  static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
  {
-       return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE);
+       return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
  }
  
  static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
@@ -213,7 +211,7 @@ static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
  }
  
  static int __cpuacct_percpu_seq_show(struct seq_file *m,
-                                    enum cpuacct_usage_index index)
+                                    enum cpuacct_stat_index index)
  {
         struct cpuacct *ca = css_ca(seq_css(m));
         u64 percpu;
@@ -229,48 +227,78 @@ static int __cpuacct_percpu_seq_show(struct seq_file *m,
  
  static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
  {
-       return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER);
+       return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
  }
  
  static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
  {
-       return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM);
+       return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
  }
  
  static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
  {
-       return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE);
+       return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
  }
  
-static const char * const cpuacct_stat_desc[] = {
-       [CPUACCT_STAT_USER] = "user",
-       [CPUACCT_STAT_SYSTEM] = "system",
-};
+static int cpuacct_all_seq_show(struct seq_file *m, void *V)
+{
+       struct cpuacct *ca = css_ca(seq_css(m));
+       int index;
+       int cpu;
+
+       seq_puts(m, "cpu");
+       for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
+               seq_printf(m, " %s", cpuacct_stat_desc[index]);
+       seq_puts(m, "\n");
+
+       for_each_possible_cpu(cpu) {
+               struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+
+               seq_printf(m, "%d", cpu);
+
+               for (index = 0; index < CPUACCT_STAT_NSTATS; index++) {
+#ifndef CONFIG_64BIT
+                       /*
+                        * Take rq->lock to make 64-bit read safe on 32-bit
+                        * platforms.
+                        */
+                       raw_spin_lock_irq(&cpu_rq(cpu)->lock);
+#endif
+
+                       seq_printf(m, " %llu", cpuusage->usages[index]);
+
+#ifndef CONFIG_64BIT
+                       raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
+#endif
+               }
+               seq_puts(m, "\n");
+       }
+       return 0;
+}
  
  static int cpuacct_stats_show(struct seq_file *sf, void *v)
  {
         struct cpuacct *ca = css_ca(seq_css(sf));
+       s64 val[CPUACCT_STAT_NSTATS];
         int cpu;
-       s64 val = 0;
+       int stat;
  
+       memset(val, 0, sizeof(val));
         for_each_possible_cpu(cpu) {
-               struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
-               val += kcpustat->cpustat[CPUTIME_USER];
-               val += kcpustat->cpustat[CPUTIME_NICE];
-       }
-       val = cputime64_to_clock_t(val);
-       seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
+               u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
  
-       val = 0;
-       for_each_possible_cpu(cpu) {
-               struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
-               val += kcpustat->cpustat[CPUTIME_SYSTEM];
-               val += kcpustat->cpustat[CPUTIME_IRQ];
-               val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
+               val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
+               val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
+               val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
+               val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
+               val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
         }
  
-       val = cputime64_to_clock_t(val);
-       seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
+       for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
+               seq_printf(sf, "%s %lld\n",
+                          cpuacct_stat_desc[stat],
+                          cputime64_to_clock_t(val[stat]));
+       }
  
         return 0;
  }
@@ -301,6 +329,10 @@ static struct cftype files[] = {
                 .name = "usage_percpu_sys",
                 .seq_show = cpuacct_percpu_sys_seq_show,
         },
+       {
+               .name = "usage_all",
+               .seq_show = cpuacct_all_seq_show,
+       },
         {
                 .name = "stat",
                 .seq_show = cpuacct_stats_show,
@@ -316,11 +348,11 @@ static struct cftype files[] = {
  void cpuacct_charge(struct task_struct *tsk, u64 cputime)
  {
         struct cpuacct *ca;
-       int index = CPUACCT_USAGE_SYSTEM;
+       int index = CPUACCT_STAT_SYSTEM;
         struct pt_regs *regs = task_pt_regs(tsk);
  
         if (regs && user_mode(regs))
-               index = CPUACCT_USAGE_USER;
+               index = CPUACCT_STAT_USER;
  
         rcu_read_lock();
  
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c

index 75f98c5498d55d38b0a36bba2719907742166fe1..3d60e5d76fdb76bfe47389384c2505d2b43d3f0e 100644 (file)
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -257,7 +257,7 @@ void account_idle_time(cputime_t cputime)
                 cpustat[CPUTIME_IDLE] += (__force u64) cputime;
  }
  
-static __always_inline bool steal_account_process_tick(void)
+static __always_inline unsigned long steal_account_process_tick(unsigned long max_jiffies)
  {
  #ifdef CONFIG_PARAVIRT
         if (static_key_false(&paravirt_steal_enabled)) {
@@ -272,14 +272,14 @@ static __always_inline bool steal_account_process_tick(void)
                  * time in jiffies. Lets cast the result to jiffies
                  * granularity and account the rest on the next rounds.
                  */
-               steal_jiffies = nsecs_to_jiffies(steal);
+               steal_jiffies = min(nsecs_to_jiffies(steal), max_jiffies);
                 this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
  
                 account_steal_time(jiffies_to_cputime(steal_jiffies));
                 return steal_jiffies;
         }
  #endif
-       return false;
+       return 0;
  }
  
  /*
@@ -346,7 +346,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
         u64 cputime = (__force u64) cputime_one_jiffy;
         u64 *cpustat = kcpustat_this_cpu->cpustat;
  
-       if (steal_account_process_tick())
+       if (steal_account_process_tick(ULONG_MAX))
                 return;
  
         cputime *= ticks;
@@ -477,7 +477,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
                 return;
         }
  
-       if (steal_account_process_tick())
+       if (steal_account_process_tick(ULONG_MAX))
                 return;
  
         if (user_tick)
@@ -681,12 +681,14 @@ static cputime_t vtime_delta(struct task_struct *tsk)
  static cputime_t get_vtime_delta(struct task_struct *tsk)
  {
         unsigned long now = READ_ONCE(jiffies);
-       unsigned long delta = now - tsk->vtime_snap;
+       unsigned long delta_jiffies, steal_jiffies;
  
+       delta_jiffies = now - tsk->vtime_snap;
+       steal_jiffies = steal_account_process_tick(delta_jiffies);
         WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
         tsk->vtime_snap = now;
  
-       return jiffies_to_cputime(delta);
+       return jiffies_to_cputime(delta_jiffies - steal_jiffies);
  }
  
  static void __vtime_account_system(struct task_struct *tsk)
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c

index 0368c393a3362d981e79745716cbb59b2989dac8..2a0a9995256d9e920d3c94cf944cd6acb61fa627 100644 (file)
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -879,9 +879,9 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
  
         nr_switches = p->nvcsw + p->nivcsw;
  
-#ifdef CONFIG_SCHEDSTATS
         P(se.nr_migrations);
  
+#ifdef CONFIG_SCHEDSTATS
         if (schedstat_enabled()) {
                 u64 avg_atom, avg_per_cpu;
  
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index c8c5d2d484249048cca7dd62fdfc38fbe7de2883..4088eedea7637859844c777dfa56dfb23136c142 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -690,6 +690,11 @@ void init_entity_runnable_average(struct sched_entity *se)
         /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
  }
  
+static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
+static int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq);
+static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force);
+static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se);
+
  /*
   * With new tasks being created, their initial util_avgs are extrapolated
   * based on the cfs_rq's current util_avg:
@@ -720,6 +725,8 @@ void post_init_entity_util_avg(struct sched_entity *se)
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
         struct sched_avg *sa = &se->avg;
         long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
+       u64 now = cfs_rq_clock_task(cfs_rq);
+       int tg_update;
  
         if (cap > 0) {
                 if (cfs_rq->avg.util_avg != 0) {
@@ -733,16 +740,42 @@ void post_init_entity_util_avg(struct sched_entity *se)
                 }
                 sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
         }
+
+       if (entity_is_task(se)) {
+               struct task_struct *p = task_of(se);
+               if (p->sched_class != &fair_sched_class) {
+                       /*
+                        * For !fair tasks do:
+                        *
+                       update_cfs_rq_load_avg(now, cfs_rq, false);
+                       attach_entity_load_avg(cfs_rq, se);
+                       switched_from_fair(rq, p);
+                        *
+                        * such that the next switched_to_fair() has the
+                        * expected state.
+                        */
+                       se->avg.last_update_time = now;
+                       return;
+               }
+       }
+
+       tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
+       attach_entity_load_avg(cfs_rq, se);
+       if (tg_update)
+               update_tg_load_avg(cfs_rq, false);
  }
  
-#else
+#else /* !CONFIG_SMP */
  void init_entity_runnable_average(struct sched_entity *se)
  {
  }
  void post_init_entity_util_avg(struct sched_entity *se)
  {
  }
-#endif
+static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
+{
+}
+#endif /* CONFIG_SMP */
  
  /*
   * Update the current task's runtime statistics.
@@ -1303,6 +1336,8 @@ static void task_numa_assign(struct task_numa_env *env,
  {
         if (env->best_task)
                 put_task_struct(env->best_task);
+       if (p)
+               get_task_struct(p);
  
         env->best_task = p;
         env->best_imp = imp;
@@ -1370,31 +1405,11 @@ static void task_numa_compare(struct task_numa_env *env,
         long imp = env->p->numa_group ? groupimp : taskimp;
         long moveimp = imp;
         int dist = env->dist;
-       bool assigned = false;
  
         rcu_read_lock();
-
-       raw_spin_lock_irq(&dst_rq->lock);
-       cur = dst_rq->curr;
-       /*
-        * No need to move the exiting task or idle task.
-        */
-       if ((cur->flags & PF_EXITING) || is_idle_task(cur))
+       cur = task_rcu_dereference(&dst_rq->curr);
+       if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
                 cur = NULL;
-       else {
-               /*
-                * The task_struct must be protected here to protect the
-                * p->numa_faults access in the task_weight since the
-                * numa_faults could already be freed in the following path:
-                * finish_task_switch()
-                *     --> put_task_struct()
-                *         --> __put_task_struct()
-                *             --> task_numa_free()
-                */
-               get_task_struct(cur);
-       }
-
-       raw_spin_unlock_irq(&dst_rq->lock);
  
         /*
          * Because we have preemption enabled we can get migrated around and
@@ -1477,7 +1492,6 @@ balance:
                  */
                 if (!load_too_imbalanced(src_load, dst_load, env)) {
                         imp = moveimp - 1;
-                       put_task_struct(cur);
                         cur = NULL;
                         goto assign;
                 }
@@ -1503,16 +1517,9 @@ balance:
                 env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
  
  assign:
-       assigned = true;
         task_numa_assign(env, cur, imp);
  unlock:
         rcu_read_unlock();
-       /*
-        * The dst_rq->curr isn't assigned. The protection for task_struct is
-        * finished.
-        */
-       if (cur && !assigned)
-               put_task_struct(cur);
  }
  
  static void task_numa_find_cpu(struct task_numa_env *env,
@@ -2866,8 +2873,6 @@ void set_task_rq_fair(struct sched_entity *se,
  static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
  #endif /* CONFIG_FAIR_GROUP_SCHED */
  
-static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
-
  static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
  {
         struct rq *rq = rq_of(cfs_rq);
@@ -2914,7 +2919,23 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
         WRITE_ONCE(*ptr, res);                                  \
  } while (0)
  
-/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
+/**
+ * update_cfs_rq_load_avg - update the cfs_rq's load/util averages
+ * @now: current time, as per cfs_rq_clock_task()
+ * @cfs_rq: cfs_rq to update
+ * @update_freq: should we call cfs_rq_util_change() or will the call do so
+ *
+ * The cfs_rq avg is the direct sum of all its entities (blocked and runnable)
+ * avg. The immediate corollary is that all (fair) tasks must be attached, see
+ * post_init_entity_util_avg().
+ *
+ * cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
+ *
+ * Returns true if the load decayed or we removed utilization. It is expected
+ * that one calls update_tg_load_avg() on this condition, but after you've
+ * modified the cfs_rq avg (attach/detach), such that we propagate the new
+ * avg up.
+ */
  static inline int
  update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
  {
@@ -2969,6 +2990,14 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
                 update_tg_load_avg(cfs_rq, 0);
  }
  
+/**
+ * attach_entity_load_avg - attach this entity to its cfs_rq load avg
+ * @cfs_rq: cfs_rq to attach to
+ * @se: sched_entity to attach
+ *
+ * Must call update_cfs_rq_load_avg() before this, since we rely on
+ * cfs_rq->avg.last_update_time being current.
+ */
  static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
         if (!sched_feat(ATTACH_AGE_LOAD))
@@ -2977,6 +3006,8 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
         /*
          * If we got migrated (either between CPUs or between cgroups) we'll
          * have aged the average right before clearing @last_update_time.
+        *
+        * Or we're fresh through post_init_entity_util_avg().
          */
         if (se->avg.last_update_time) {
                 __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
@@ -2998,6 +3029,14 @@ skip_aging:
         cfs_rq_util_change(cfs_rq);
  }
  
+/**
+ * detach_entity_load_avg - detach this entity from its cfs_rq load avg
+ * @cfs_rq: cfs_rq to detach from
+ * @se: sched_entity to detach
+ *
+ * Must call update_cfs_rq_load_avg() before this, since we rely on
+ * cfs_rq->avg.last_update_time being current.
+ */
  static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
         __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
@@ -3082,11 +3121,14 @@ void remove_entity_load_avg(struct sched_entity *se)
         u64 last_update_time;
  
         /*
-        * Newly created task or never used group entity should not be removed
-        * from its (source) cfs_rq
+        * tasks cannot exit without having gone through wake_up_new_task() ->
+        * post_init_entity_util_avg() which will have added things to the
+        * cfs_rq, so we can remove unconditionally.
+        *
+        * Similarly for groups, they will have passed through
+        * post_init_entity_util_avg() before unregister_sched_fair_group()
+        * calls this.
          */
-       if (se->avg.last_update_time == 0)
-               return;
  
         last_update_time = cfs_rq_last_update_time(cfs_rq);
  
@@ -3109,6 +3151,12 @@ static int idle_balance(struct rq *this_rq);
  
  #else /* CONFIG_SMP */
  
+static inline int
+update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
+{
+       return 0;
+}
+
  static inline void update_load_avg(struct sched_entity *se, int not_used)
  {
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
@@ -3698,7 +3746,7 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
  static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
  {
         if (unlikely(cfs_rq->throttle_count))
-               return cfs_rq->throttled_clock_task;
+               return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time;
  
         return rq_clock_task(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
  }
@@ -3836,13 +3884,11 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
         struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
  
         cfs_rq->throttle_count--;
-#ifdef CONFIG_SMP
         if (!cfs_rq->throttle_count) {
                 /* adjust cfs_rq_clock_task() */
                 cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
                                              cfs_rq->throttled_clock_task;
         }
-#endif
  
         return 0;
  }
@@ -4195,26 +4241,6 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
         if (!cfs_bandwidth_used())
                 return;
  
-       /* Synchronize hierarchical throttle counter: */
-       if (unlikely(!cfs_rq->throttle_uptodate)) {
-               struct rq *rq = rq_of(cfs_rq);
-               struct cfs_rq *pcfs_rq;
-               struct task_group *tg;
-
-               cfs_rq->throttle_uptodate = 1;
-
-               /* Get closest up-to-date node, because leaves go first: */
-               for (tg = cfs_rq->tg->parent; tg; tg = tg->parent) {
-                       pcfs_rq = tg->cfs_rq[cpu_of(rq)];
-                       if (pcfs_rq->throttle_uptodate)
-                               break;
-               }
-               if (tg) {
-                       cfs_rq->throttle_count = pcfs_rq->throttle_count;
-                       cfs_rq->throttled_clock_task = rq_clock_task(rq);
-               }
-       }
-
         /* an active group must be handled by the update_curr()->put() path */
         if (!cfs_rq->runtime_enabled || cfs_rq->curr)
                 return;
@@ -4229,6 +4255,23 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
                 throttle_cfs_rq(cfs_rq);
  }
  
+static void sync_throttle(struct task_group *tg, int cpu)
+{
+       struct cfs_rq *pcfs_rq, *cfs_rq;
+
+       if (!cfs_bandwidth_used())
+               return;
+
+       if (!tg->parent)
+               return;
+
+       cfs_rq = tg->cfs_rq[cpu];
+       pcfs_rq = tg->parent->cfs_rq[cpu];
+
+       cfs_rq->throttle_count = pcfs_rq->throttle_count;
+       pcfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
+}
+
  /* conditionally throttle active cfs_rq's from put_prev_entity() */
  static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq)
  {
@@ -4368,6 +4411,7 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
  static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
  static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
  static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
+static inline void sync_throttle(struct task_group *tg, int cpu) {}
  static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
  
  static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
@@ -4476,7 +4520,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                  *
                  * note: in the case of encountering a throttled cfs_rq we will
                  * post the final h_nr_running increment below.
-               */
+                */
                 if (cfs_rq_throttled(cfs_rq))
                         break;
                 cfs_rq->h_nr_running++;
@@ -8317,31 +8361,17 @@ static void task_fork_fair(struct task_struct *p)
  {
         struct cfs_rq *cfs_rq;
         struct sched_entity *se = &p->se, *curr;
-       int this_cpu = smp_processor_id();
         struct rq *rq = this_rq();
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&rq->lock, flags);
  
+       raw_spin_lock(&rq->lock);
         update_rq_clock(rq);
  
         cfs_rq = task_cfs_rq(current);
         curr = cfs_rq->curr;
-
-       /*
-        * Not only the cpu but also the task_group of the parent might have
-        * been changed after parent->se.parent,cfs_rq were copied to
-        * child->se.parent,cfs_rq. So call __set_task_cpu() to make those
-        * of child point to valid ones.
-        */
-       rcu_read_lock();
-       __set_task_cpu(p, this_cpu);
-       rcu_read_unlock();
-
-       update_curr(cfs_rq);
-
-       if (curr)
+       if (curr) {
+               update_curr(cfs_rq);
                 se->vruntime = curr->vruntime;
+       }
         place_entity(cfs_rq, se, 1);
  
         if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
@@ -8354,8 +8384,7 @@ static void task_fork_fair(struct task_struct *p)
         }
  
         se->vruntime -= cfs_rq->min_vruntime;
-
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
+       raw_spin_unlock(&rq->lock);
  }
  
  /*
@@ -8411,6 +8440,8 @@ static void detach_task_cfs_rq(struct task_struct *p)
  {
         struct sched_entity *se = &p->se;
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
+       u64 now = cfs_rq_clock_task(cfs_rq);
+       int tg_update;
  
         if (!vruntime_normalized(p)) {
                 /*
@@ -8422,13 +8453,18 @@ static void detach_task_cfs_rq(struct task_struct *p)
         }
  
         /* Catch up with the cfs_rq and remove our load when we leave */
+       tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
         detach_entity_load_avg(cfs_rq, se);
+       if (tg_update)
+               update_tg_load_avg(cfs_rq, false);
  }
  
  static void attach_task_cfs_rq(struct task_struct *p)
  {
         struct sched_entity *se = &p->se;
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
+       u64 now = cfs_rq_clock_task(cfs_rq);
+       int tg_update;
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
         /*
@@ -8439,7 +8475,10 @@ static void attach_task_cfs_rq(struct task_struct *p)
  #endif
  
         /* Synchronize task with its cfs_rq */
+       tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
         attach_entity_load_avg(cfs_rq, se);
+       if (tg_update)
+               update_tg_load_avg(cfs_rq, false);
  
         if (!vruntime_normalized(p))
                 se->vruntime += cfs_rq->min_vruntime;
@@ -8499,6 +8538,14 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
  }
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
+static void task_set_group_fair(struct task_struct *p)
+{
+       struct sched_entity *se = &p->se;
+
+       set_task_rq(p, task_cpu(p));
+       se->depth = se->parent ? se->parent->depth + 1 : 0;
+}
+
  static void task_move_group_fair(struct task_struct *p)
  {
         detach_task_cfs_rq(p);
@@ -8511,6 +8558,19 @@ static void task_move_group_fair(struct task_struct *p)
         attach_task_cfs_rq(p);
  }
  
+static void task_change_group_fair(struct task_struct *p, int type)
+{
+       switch (type) {
+       case TASK_SET_GROUP:
+               task_set_group_fair(p);
+               break;
+
+       case TASK_MOVE_GROUP:
+               task_move_group_fair(p);
+               break;
+       }
+}
+
  void free_fair_sched_group(struct task_group *tg)
  {
         int i;
@@ -8562,10 +8622,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
                 init_cfs_rq(cfs_rq);
                 init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
                 init_entity_runnable_average(se);
-
-               raw_spin_lock_irq(&rq->lock);
-               post_init_entity_util_avg(se);
-               raw_spin_unlock_irq(&rq->lock);
         }
  
         return 1;
@@ -8576,6 +8632,23 @@ err:
         return 0;
  }
  
+void online_fair_sched_group(struct task_group *tg)
+{
+       struct sched_entity *se;
+       struct rq *rq;
+       int i;
+
+       for_each_possible_cpu(i) {
+               rq = cpu_rq(i);
+               se = tg->se[i];
+
+               raw_spin_lock_irq(&rq->lock);
+               post_init_entity_util_avg(se);
+               sync_throttle(tg, i);
+               raw_spin_unlock_irq(&rq->lock);
+       }
+}
+
  void unregister_fair_sched_group(struct task_group *tg)
  {
         unsigned long flags;
@@ -8680,6 +8753,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
         return 1;
  }
  
+void online_fair_sched_group(struct task_group *tg) { }
+
  void unregister_fair_sched_group(struct task_group *tg) { }
  
  #endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -8739,7 +8814,7 @@ const struct sched_class fair_sched_class = {
         .update_curr            = update_curr_fair,
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
-       .task_move_group        = task_move_group_fair,
+       .task_change_group      = task_change_group_fair,
  #endif
  };
  
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c

index c5aeedf4e93ad8f8f5c2edaf7248ac0ce9c47e8e..9fb873cfc75cfb332c59437bd936e1dd9493d92e 100644 (file)
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -201,6 +201,8 @@ exit_idle:
   */
  static void cpu_idle_loop(void)
  {
+       int cpu = smp_processor_id();
+
         while (1) {
                 /*
                  * If the arch has a polling bit, we maintain an invariant:
@@ -219,7 +221,7 @@ static void cpu_idle_loop(void)
                         check_pgt_cache();
                         rmb();
  
-                       if (cpu_is_offline(smp_processor_id())) {
+                       if (cpu_is_offline(cpu)) {
                                 cpuhp_report_idle_dead();
                                 arch_cpu_idle_dead();
                         }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 81283592942bf8f9954b65e5364fbcf03646da59..c64fc5114004f6a893a1bf942a49f3df1d6c8fea 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -321,6 +321,7 @@ extern int tg_nop(struct task_group *tg, void *data);
  
  extern void free_fair_sched_group(struct task_group *tg);
  extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
+extern void online_fair_sched_group(struct task_group *tg);
  extern void unregister_fair_sched_group(struct task_group *tg);
  extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
                         struct sched_entity *se, int cpu,
@@ -437,7 +438,7 @@ struct cfs_rq {
  
         u64 throttled_clock, throttled_clock_task;
         u64 throttled_clock_task_time;
-       int throttled, throttle_count, throttle_uptodate;
+       int throttled, throttle_count;
         struct list_head throttled_list;
  #endif /* CONFIG_CFS_BANDWIDTH */
  #endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -1246,8 +1247,11 @@ struct sched_class {
  
         void (*update_curr) (struct rq *rq);
  
+#define TASK_SET_GROUP  0
+#define TASK_MOVE_GROUP        1
+
  #ifdef CONFIG_FAIR_GROUP_SCHED
-       void (*task_move_group) (struct task_struct *p);
+       void (*task_change_group) (struct task_struct *p, int type);
  #endif
  };
  
@@ -1809,16 +1813,3 @@ static inline void cpufreq_trigger_update(u64 time) {}
  #else /* arch_scale_freq_capacity */
  #define arch_scale_freq_invariant()    (false)
  #endif
-
-static inline void account_reset_rq(struct rq *rq)
-{
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-       rq->prev_irq_time = 0;
-#endif
-#ifdef CONFIG_PARAVIRT
-       rq->prev_steal_time = 0;
-#endif
-#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-       rq->prev_steal_time_rq = 0;
-#endif
-}
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 25 Jul 2016 20:59:34 +0000 (13:59 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 25 Jul 2016 20:59:34 +0000 (13:59 -0700)
arch/x86/kernel/kvm.c		patch \| blob \| blame \| history
include/linux/sched.h		patch \| blob \| blame \| history
kernel/exit.c		patch \| blob \| blame \| history
kernel/sched/core.c		patch \| blob \| blame \| history
kernel/sched/cpuacct.c		patch \| blob \| blame \| history
kernel/sched/cputime.c		patch \| blob \| blame \| history
kernel/sched/debug.c		patch \| blob \| blame \| history
kernel/sched/fair.c		patch \| blob \| blame \| history
kernel/sched/idle.c		patch \| blob \| blame \| history
kernel/sched/sched.h		patch \| blob \| blame \| history