Merge tag 'v3.17-rc4' into sched/core, to prevent conflicts with upcoming patches...
authorIngo Molnar <mingo@kernel.org>
Mon, 8 Sep 2014 06:11:07 +0000 (08:11 +0200)
committerIngo Molnar <mingo@kernel.org>
Mon, 8 Sep 2014 06:11:34 +0000 (08:11 +0200)
Linux 3.17-rc4

1  2 
kernel/sched/core.c

diff --combined kernel/sched/core.c
index 8d00f4a8c126232193d8e8725b68599cbb42cada,ec1a286684a56047a4352350edf4c0686e4ba70e..a814b3c8802964fef99d4a245e0328106625d946
@@@ -333,12 -333,9 +333,12 @@@ static inline struct rq *__task_rq_lock
        for (;;) {
                rq = task_rq(p);
                raw_spin_lock(&rq->lock);
 -              if (likely(rq == task_rq(p)))
 +              if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
                        return rq;
                raw_spin_unlock(&rq->lock);
 +
 +              while (unlikely(task_on_rq_migrating(p)))
 +                      cpu_relax();
        }
  }
  
@@@ -355,13 -352,10 +355,13 @@@ static struct rq *task_rq_lock(struct t
                raw_spin_lock_irqsave(&p->pi_lock, *flags);
                rq = task_rq(p);
                raw_spin_lock(&rq->lock);
 -              if (likely(rq == task_rq(p)))
 +              if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
                        return rq;
                raw_spin_unlock(&rq->lock);
                raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
 +
 +              while (unlikely(task_on_rq_migrating(p)))
 +                      cpu_relax();
        }
  }
  
@@@ -455,15 -449,7 +455,15 @@@ static void __hrtick_start(void *arg
  void hrtick_start(struct rq *rq, u64 delay)
  {
        struct hrtimer *timer = &rq->hrtick_timer;
 -      ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
 +      ktime_t time;
 +      s64 delta;
 +
 +      /*
 +       * Don't schedule slices shorter than 10000ns, that just
 +       * doesn't make sense and can cause timer DoS.
 +       */
 +      delta = max_t(s64, delay, 10000LL);
 +      time = ktime_add_ns(timer->base->get_time(), delta);
  
        hrtimer_set_expires(timer, time);
  
@@@ -1057,7 -1043,7 +1057,7 @@@ void check_preempt_curr(struct rq *rq, 
         * A queue event has occurred, and we're going to schedule.  In
         * this case, we can save a useless back to back clock update.
         */
 -      if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
 +      if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
                rq->skip_clock_update = 1;
  }
  
@@@ -1102,7 -1088,7 +1102,7 @@@ void set_task_cpu(struct task_struct *p
  
  static void __migrate_swap_task(struct task_struct *p, int cpu)
  {
 -      if (p->on_rq) {
 +      if (task_on_rq_queued(p)) {
                struct rq *src_rq, *dst_rq;
  
                src_rq = task_rq(p);
@@@ -1228,7 -1214,7 +1228,7 @@@ static int migration_cpu_stop(void *dat
  unsigned long wait_task_inactive(struct task_struct *p, long match_state)
  {
        unsigned long flags;
 -      int running, on_rq;
 +      int running, queued;
        unsigned long ncsw;
        struct rq *rq;
  
                rq = task_rq_lock(p, &flags);
                trace_sched_wait_task(p);
                running = task_running(rq, p);
 -              on_rq = p->on_rq;
 +              queued = task_on_rq_queued(p);
                ncsw = 0;
                if (!match_state || p->state == match_state)
                        ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
                 * running right now), it's preempted, and we should
                 * yield - it could be a while.
                 */
 -              if (unlikely(on_rq)) {
 +              if (unlikely(queued)) {
                        ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ);
  
                        set_current_state(TASK_UNINTERRUPTIBLE);
@@@ -1492,7 -1478,7 +1492,7 @@@ ttwu_stat(struct task_struct *p, int cp
  static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
  {
        activate_task(rq, p, en_flags);
 -      p->on_rq = 1;
 +      p->on_rq = TASK_ON_RQ_QUEUED;
  
        /* if a worker is waking up, notify workqueue */
        if (p->flags & PF_WQ_WORKER)
@@@ -1551,7 -1537,7 +1551,7 @@@ static int ttwu_remote(struct task_stru
        int ret = 0;
  
        rq = __task_rq_lock(p);
 -      if (p->on_rq) {
 +      if (task_on_rq_queued(p)) {
                /* check_preempt_curr() may use rq clock */
                update_rq_clock(rq);
                ttwu_do_wakeup(rq, p, wake_flags);
@@@ -1756,7 -1742,7 +1756,7 @@@ static void try_to_wake_up_local(struc
        if (!(p->state & TASK_NORMAL))
                goto out;
  
 -      if (!p->on_rq)
 +      if (!task_on_rq_queued(p))
                ttwu_activate(rq, p, ENQUEUE_WAKEUP);
  
        ttwu_do_wakeup(rq, p, 0);
@@@ -2109,7 -2095,7 +2109,7 @@@ void wake_up_new_task(struct task_struc
        init_task_runnable_average(p);
        rq = __task_rq_lock(p);
        activate_task(rq, p, 0);
 -      p->on_rq = 1;
 +      p->on_rq = TASK_ON_RQ_QUEUED;
        trace_sched_wakeup_new(p, true);
        check_preempt_curr(rq, p, WF_FORK);
  #ifdef CONFIG_SMP
@@@ -2407,6 -2393,13 +2407,13 @@@ unsigned long nr_iowait_cpu(int cpu
        return atomic_read(&this->nr_iowait);
  }
  
+ void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
+ {
+       struct rq *this = this_rq();
+       *nr_waiters = atomic_read(&this->nr_iowait);
+       *load = this->cpu_load[0];
+ }
  #ifdef CONFIG_SMP
  
  /*
@@@ -2458,7 -2451,7 +2465,7 @@@ static u64 do_task_delta_exec(struct ta
         * project cycles that may never be accounted to this
         * thread, breaking clock_gettime().
         */
 -      if (task_current(rq, p) && p->on_rq) {
 +      if (task_current(rq, p) && task_on_rq_queued(p)) {
                update_rq_clock(rq);
                ns = rq_clock_task(rq) - p->se.exec_start;
                if ((s64)ns < 0)
@@@ -2504,7 -2497,7 +2511,7 @@@ unsigned long long task_sched_runtime(s
         * If we see ->on_cpu without ->on_rq, the task is leaving, and has
         * been accounted, so we're correct here as well.
         */
 -      if (!p->on_cpu || !p->on_rq)
 +      if (!p->on_cpu || !task_on_rq_queued(p))
                return p->se.sum_exec_runtime;
  #endif
  
@@@ -2808,7 -2801,7 +2815,7 @@@ need_resched
                switch_count = &prev->nvcsw;
        }
  
 -      if (prev->on_rq || rq->skip_clock_update < 0)
 +      if (task_on_rq_queued(prev) || rq->skip_clock_update < 0)
                update_rq_clock(rq);
  
        next = pick_next_task(rq, prev);
@@@ -2973,7 -2966,7 +2980,7 @@@ EXPORT_SYMBOL(default_wake_function)
   */
  void rt_mutex_setprio(struct task_struct *p, int prio)
  {
 -      int oldprio, on_rq, running, enqueue_flag = 0;
 +      int oldprio, queued, running, enqueue_flag = 0;
        struct rq *rq;
        const struct sched_class *prev_class;
  
        trace_sched_pi_setprio(p, prio);
        oldprio = p->prio;
        prev_class = p->sched_class;
 -      on_rq = p->on_rq;
 +      queued = task_on_rq_queued(p);
        running = task_current(rq, p);
 -      if (on_rq)
 +      if (queued)
                dequeue_task(rq, p, 0);
        if (running)
                p->sched_class->put_prev_task(rq, p);
  
        if (running)
                p->sched_class->set_curr_task(rq);
 -      if (on_rq)
 +      if (queued)
                enqueue_task(rq, p, enqueue_flag);
  
        check_class_changed(rq, p, prev_class, oldprio);
@@@ -3055,7 -3048,7 +3062,7 @@@ out_unlock
  
  void set_user_nice(struct task_struct *p, long nice)
  {
 -      int old_prio, delta, on_rq;
 +      int old_prio, delta, queued;
        unsigned long flags;
        struct rq *rq;
  
                p->static_prio = NICE_TO_PRIO(nice);
                goto out_unlock;
        }
 -      on_rq = p->on_rq;
 -      if (on_rq)
 +      queued = task_on_rq_queued(p);
 +      if (queued)
                dequeue_task(rq, p, 0);
  
        p->static_prio = NICE_TO_PRIO(nice);
        p->prio = effective_prio(p);
        delta = p->prio - old_prio;
  
 -      if (on_rq) {
 +      if (queued) {
                enqueue_task(rq, p, 0);
                /*
                 * If the task increased its priority or is running and
@@@ -3358,7 -3351,7 +3365,7 @@@ static int __sched_setscheduler(struct 
  {
        int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
                      MAX_RT_PRIO - 1 - attr->sched_priority;
 -      int retval, oldprio, oldpolicy = -1, on_rq, running;
 +      int retval, oldprio, oldpolicy = -1, queued, running;
        int policy = attr->sched_policy;
        unsigned long flags;
        const struct sched_class *prev_class;
@@@ -3555,9 -3548,9 +3562,9 @@@ change
                return 0;
        }
  
 -      on_rq = p->on_rq;
 +      queued = task_on_rq_queued(p);
        running = task_current(rq, p);
 -      if (on_rq)
 +      if (queued)
                dequeue_task(rq, p, 0);
        if (running)
                p->sched_class->put_prev_task(rq, p);
  
        if (running)
                p->sched_class->set_curr_task(rq);
 -      if (on_rq) {
 +      if (queued) {
                /*
                 * We enqueue to tail when the priority of a task is
                 * increased (user space view).
@@@ -4519,7 -4512,7 +4526,7 @@@ void show_state_filter(unsigned long st
                "  task                        PC stack   pid father\n");
  #endif
        rcu_read_lock();
 -      do_each_thread(g, p) {
 +      for_each_process_thread(g, p) {
                /*
                 * reset the NMI-timeout, listing all files on a slow
                 * console might take a lot of time:
                touch_nmi_watchdog();
                if (!state_filter || (p->state & state_filter))
                        sched_show_task(p);
 -      } while_each_thread(g, p);
 +      }
  
        touch_all_softlockup_watchdogs();
  
@@@ -4582,7 -4575,7 +4589,7 @@@ void init_idle(struct task_struct *idle
        rcu_read_unlock();
  
        rq->curr = rq->idle = idle;
 -      idle->on_rq = 1;
 +      idle->on_rq = TASK_ON_RQ_QUEUED;
  #if defined(CONFIG_SMP)
        idle->on_cpu = 1;
  #endif
@@@ -4659,7 -4652,7 +4666,7 @@@ int set_cpus_allowed_ptr(struct task_st
                goto out;
  
        dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
 -      if (p->on_rq) {
 +      if (task_on_rq_queued(p)) {
                struct migration_arg arg = { p, dest_cpu };
                /* Need help from migration thread: drop lock and wait. */
                task_rq_unlock(rq, p, &flags);
@@@ -4687,20 -4680,20 +4694,20 @@@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr)
   */
  static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
  {
 -      struct rq *rq_dest, *rq_src;
 +      struct rq *rq;
        int ret = 0;
  
        if (unlikely(!cpu_active(dest_cpu)))
                return ret;
  
 -      rq_src = cpu_rq(src_cpu);
 -      rq_dest = cpu_rq(dest_cpu);
 +      rq = cpu_rq(src_cpu);
  
        raw_spin_lock(&p->pi_lock);
 -      double_rq_lock(rq_src, rq_dest);
 +      raw_spin_lock(&rq->lock);
        /* Already moved. */
        if (task_cpu(p) != src_cpu)
                goto done;
 +
        /* Affinity changed (again). */
        if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
                goto fail;
         * If we're not on a rq, the next wake-up will ensure we're
         * placed properly.
         */
 -      if (p->on_rq) {
 -              dequeue_task(rq_src, p, 0);
 +      if (task_on_rq_queued(p)) {
 +              dequeue_task(rq, p, 0);
 +              p->on_rq = TASK_ON_RQ_MIGRATING;
                set_task_cpu(p, dest_cpu);
 -              enqueue_task(rq_dest, p, 0);
 -              check_preempt_curr(rq_dest, p, 0);
 +              raw_spin_unlock(&rq->lock);
 +
 +              rq = cpu_rq(dest_cpu);
 +              raw_spin_lock(&rq->lock);
 +              BUG_ON(task_rq(p) != rq);
 +              p->on_rq = TASK_ON_RQ_QUEUED;
 +              enqueue_task(rq, p, 0);
 +              check_preempt_curr(rq, p, 0);
        }
  done:
        ret = 1;
  fail:
 -      double_rq_unlock(rq_src, rq_dest);
 +      raw_spin_unlock(&rq->lock);
        raw_spin_unlock(&p->pi_lock);
        return ret;
  }
@@@ -4757,13 -4743,13 +4764,13 @@@ void sched_setnuma(struct task_struct *
  {
        struct rq *rq;
        unsigned long flags;
 -      bool on_rq, running;
 +      bool queued, running;
  
        rq = task_rq_lock(p, &flags);
 -      on_rq = p->on_rq;
 +      queued = task_on_rq_queued(p);
        running = task_current(rq, p);
  
 -      if (on_rq)
 +      if (queued)
                dequeue_task(rq, p, 0);
        if (running)
                p->sched_class->put_prev_task(rq, p);
  
        if (running)
                p->sched_class->set_curr_task(rq);
 -      if (on_rq)
 +      if (queued)
                enqueue_task(rq, p, 0);
        task_rq_unlock(rq, p, &flags);
  }
@@@ -5760,7 -5746,7 +5767,7 @@@ build_overlap_sched_groups(struct sched
        const struct cpumask *span = sched_domain_span(sd);
        struct cpumask *covered = sched_domains_tmpmask;
        struct sd_data *sdd = sd->private;
 -      struct sched_domain *child;
 +      struct sched_domain *sibling;
        int i;
  
        cpumask_clear(covered);
                if (cpumask_test_cpu(i, covered))
                        continue;
  
 -              child = *per_cpu_ptr(sdd->sd, i);
 +              sibling = *per_cpu_ptr(sdd->sd, i);
  
                /* See the comment near build_group_mask(). */
 -              if (!cpumask_test_cpu(i, sched_domain_span(child)))
 +              if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
                        continue;
  
                sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
                        goto fail;
  
                sg_span = sched_group_cpus(sg);
 -              if (child->child) {
 -                      child = child->child;
 -                      cpumask_copy(sg_span, sched_domain_span(child));
 -              } else
 +              if (sibling->child)
 +                      cpumask_copy(sg_span, sched_domain_span(sibling->child));
 +              else
                        cpumask_set_cpu(i, sg_span);
  
                cpumask_or(covered, covered, sg_span);
@@@ -7137,13 -7124,13 +7144,13 @@@ static void normalize_task(struct rq *r
                .sched_policy = SCHED_NORMAL,
        };
        int old_prio = p->prio;
 -      int on_rq;
 +      int queued;
  
 -      on_rq = p->on_rq;
 -      if (on_rq)
 +      queued = task_on_rq_queued(p);
 +      if (queued)
                dequeue_task(rq, p, 0);
        __setscheduler(rq, p, &attr);
 -      if (on_rq) {
 +      if (queued) {
                enqueue_task(rq, p, 0);
                resched_curr(rq);
        }
@@@ -7158,7 -7145,7 +7165,7 @@@ void normalize_rt_tasks(void
        struct rq *rq;
  
        read_lock_irqsave(&tasklist_lock, flags);
 -      do_each_thread(g, p) {
 +      for_each_process_thread(g, p) {
                /*
                 * Only normalize user tasks:
                 */
  
                __task_rq_unlock(rq);
                raw_spin_unlock(&p->pi_lock);
 -      } while_each_thread(g, p);
 -
 +      }
        read_unlock_irqrestore(&tasklist_lock, flags);
  }
  
@@@ -7330,16 -7318,16 +7337,16 @@@ void sched_offline_group(struct task_gr
  void sched_move_task(struct task_struct *tsk)
  {
        struct task_group *tg;
 -      int on_rq, running;
 +      int queued, running;
        unsigned long flags;
        struct rq *rq;
  
        rq = task_rq_lock(tsk, &flags);
  
        running = task_current(rq, tsk);
 -      on_rq = tsk->on_rq;
 +      queued = task_on_rq_queued(tsk);
  
 -      if (on_rq)
 +      if (queued)
                dequeue_task(rq, tsk, 0);
        if (unlikely(running))
                tsk->sched_class->put_prev_task(rq, tsk);
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
        if (tsk->sched_class->task_move_group)
 -              tsk->sched_class->task_move_group(tsk, on_rq);
 +              tsk->sched_class->task_move_group(tsk, queued);
        else
  #endif
                set_task_rq(tsk, task_cpu(tsk));
  
        if (unlikely(running))
                tsk->sched_class->set_curr_task(rq);
 -      if (on_rq)
 +      if (queued)
                enqueue_task(rq, tsk, 0);
  
        task_rq_unlock(rq, tsk, &flags);
@@@ -7377,10 -7365,10 +7384,10 @@@ static inline int tg_has_rt_tasks(struc
  {
        struct task_struct *g, *p;
  
 -      do_each_thread(g, p) {
 +      for_each_process_thread(g, p) {
                if (rt_task(p) && task_rq(p)->rt.tg == tg)
                        return 1;
 -      } while_each_thread(g, p);
 +      }
  
        return 0;
  }