sched/core: Create task_has_idle_policy() helper

[linux-2.6-block.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index ad97f3ba5ec51c4a9379228b60416c72e6dc5b60..5afb868f7339de7b3e024187c9f85e80828325a3 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -135,9 +135,8 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
   * In theory, the compile should just see 0 here, and optimize out the call
   * to sched_rt_avg_update. But I don't trust it...
   */
-#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
-       s64 steal = 0, irq_delta = 0;
-#endif
+       s64 __maybe_unused steal = 0, irq_delta = 0;
+
  #ifdef CONFIG_IRQ_TIME_ACCOUNTING
         irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
  
@@ -177,7 +176,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
  
         rq->clock_task += delta;
  
-#ifdef HAVE_SCHED_AVG_IRQ
+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
         if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
                 update_irq_load_avg(rq, irq_delta + steal);
  #endif
@@ -698,9 +697,10 @@ static void set_load_weight(struct task_struct *p, bool update_load)
         /*
          * SCHED_IDLE tasks get minimal weight:
          */
-       if (idle_policy(p->policy)) {
+       if (task_has_idle_policy(p)) {
                 load->weight = scale_load(WEIGHT_IDLEPRIO);
                 load->inv_weight = WMULT_IDLEPRIO;
+               p->se.runnable_weight = load->weight;
                 return;
         }
  
@@ -713,6 +713,7 @@ static void set_load_weight(struct task_struct *p, bool update_load)
         } else {
                 load->weight = scale_load(sched_prio_to_weight[prio]);
                 load->inv_weight = sched_prio_to_wmult[prio];
+               p->se.runnable_weight = load->weight;
         }
  }
  
@@ -721,8 +722,10 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
         if (!(flags & ENQUEUE_NOCLOCK))
                 update_rq_clock(rq);
  
-       if (!(flags & ENQUEUE_RESTORE))
+       if (!(flags & ENQUEUE_RESTORE)) {
                 sched_info_queued(rq, p);
+               psi_enqueue(p, flags & ENQUEUE_WAKEUP);
+       }
  
         p->sched_class->enqueue_task(rq, p, flags);
  }
@@ -732,8 +735,10 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
         if (!(flags & DEQUEUE_NOCLOCK))
                 update_rq_clock(rq);
  
-       if (!(flags & DEQUEUE_SAVE))
+       if (!(flags & DEQUEUE_SAVE)) {
                 sched_info_dequeued(rq, p);
+               psi_dequeue(p, flags & DEQUEUE_SLEEP);
+       }
  
         p->sched_class->dequeue_task(rq, p, flags);
  }
@@ -2036,6 +2041,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
         if (task_cpu(p) != cpu) {
                 wake_flags |= WF_MIGRATED;
+               psi_ttwu_dequeue(p);
                 set_task_cpu(p, cpu);
         }
  
@@ -2915,10 +2921,10 @@ unsigned long nr_iowait(void)
  }
  
  /*
- * Consumers of these two interfaces, like for example the cpufreq menu
- * governor are using nonsensical data. Boosting frequency for a CPU that has
- * IO-wait which might not even end up running the task when it does become
- * runnable.
+ * Consumers of these two interfaces, like for example the cpuidle menu
+ * governor, are using nonsensical data. Preferring shallow idle state selection
+ * for a CPU that has IO-wait which might not even end up running the task when
+ * it does become runnable.
   */
  
  unsigned long nr_iowait_cpu(int cpu)
@@ -3050,6 +3056,7 @@ void scheduler_tick(void)
         curr->sched_class->task_tick(rq, curr, 0);
         cpu_load_update_active(rq);
         calc_global_load_tick(rq);
+       psi_task_tick(rq);
  
         rq_unlock(rq, &rf);
  
@@ -4192,7 +4199,7 @@ recheck:
                  * Treat SCHED_IDLE as nice 20. Only allow a switch to
                  * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
                  */
-               if (idle_policy(p->policy) && !idle_policy(policy)) {
+               if (task_has_idle_policy(p) && !idle_policy(policy)) {
                         if (!can_nice(p, task_nice(p)))
                                 return -EPERM;
                 }
@@ -4932,9 +4939,7 @@ static void do_sched_yield(void)
         struct rq_flags rf;
         struct rq *rq;
  
-       local_irq_disable();
-       rq = this_rq();
-       rq_lock(rq, &rf);
+       rq = this_rq_lock_irq(&rf);
  
         schedstat_inc(rq->yld_count);
         current->sched_class->yield_task(rq);
@@ -5243,7 +5248,7 @@ out_unlock:
   * an error code.
   */
  SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-               struct timespec __user *, interval)
+               struct __kernel_timespec __user *, interval)
  {
         struct timespec64 t;
         int retval = sched_rr_get_interval(pid, &t);
@@ -5254,16 +5259,16 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
         return retval;
  }
  
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
  COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval,
                        compat_pid_t, pid,
-                      struct compat_timespec __user *, interval)
+                      struct old_timespec32 __user *, interval)
  {
         struct timespec64 t;
         int retval = sched_rr_get_interval(pid, &t);
  
         if (retval == 0)
-               retval = compat_put_timespec64(&t, interval);
+               retval = put_old_timespec32(&t, interval);
         return retval;
  }
  #endif
@@ -5854,11 +5859,14 @@ void __init sched_init_smp(void)
         /*
          * There's no userspace yet to cause hotplug operations; hence all the
          * CPU masks are stable and all blatant races in the below code cannot
-        * happen.
+        * happen. The hotplug lock is nevertheless taken to satisfy lockdep,
+        * but there won't be any contention on it.
          */
+       cpus_read_lock();
         mutex_lock(&sched_domains_mutex);
         sched_init_domains(cpu_active_mask);
         mutex_unlock(&sched_domains_mutex);
+       cpus_read_unlock();
  
         /* Move init over to a non-isolated CPU */
         if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
@@ -6068,6 +6076,8 @@ void __init sched_init(void)
  
         init_schedstats();
  
+       psi_init();
+
         scheduler_running = 1;
  }