sysctl: pass kernel pointers to ->proc_handler
[linux-block.git] / kernel / sched / fair.c
index d7fb20adabeb3027133b135e82547549826d9b2b..b6077fd5b32f6f73dfd82b59623b4d53c09fc10b 100644 (file)
@@ -645,8 +645,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
  */
 
 int sched_proc_update_handler(struct ctl_table *table, int write,
-               void __user *buffer, size_t *lenp,
-               loff_t *ppos)
+               void *buffer, size_t *lenp, loff_t *ppos)
 {
        int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        unsigned int factor = get_update_sysctl_factor();
@@ -2799,7 +2798,7 @@ static void task_numa_work(struct callback_head *work)
                 * Skip inaccessible VMAs to avoid any confusion between
                 * PROT_NONE and NUMA hinting ptes
                 */
-               if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
+               if (!vma_is_accessible(vma))
                        continue;
 
                do {
@@ -4836,11 +4835,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
                resched_curr(rq);
 }
 
-static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
+static void distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
 {
        struct cfs_rq *cfs_rq;
-       u64 runtime;
-       u64 starting_runtime = remaining;
+       u64 runtime, remaining = 1;
 
        rcu_read_lock();
        list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
@@ -4855,10 +4853,13 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
                /* By the above check, this should never be true */
                SCHED_WARN_ON(cfs_rq->runtime_remaining > 0);
 
+               raw_spin_lock(&cfs_b->lock);
                runtime = -cfs_rq->runtime_remaining + 1;
-               if (runtime > remaining)
-                       runtime = remaining;
-               remaining -= runtime;
+               if (runtime > cfs_b->runtime)
+                       runtime = cfs_b->runtime;
+               cfs_b->runtime -= runtime;
+               remaining = cfs_b->runtime;
+               raw_spin_unlock(&cfs_b->lock);
 
                cfs_rq->runtime_remaining += runtime;
 
@@ -4873,8 +4874,6 @@ next:
                        break;
        }
        rcu_read_unlock();
-
-       return starting_runtime - remaining;
 }
 
 /*
@@ -4885,7 +4884,6 @@ next:
  */
 static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags)
 {
-       u64 runtime;
        int throttled;
 
        /* no need to continue the timer with no bandwidth constraint */
@@ -4914,24 +4912,17 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, u
        cfs_b->nr_throttled += overrun;
 
        /*
-        * This check is repeated as we are holding onto the new bandwidth while
-        * we unthrottle. This can potentially race with an unthrottled group
-        * trying to acquire new bandwidth from the global pool. This can result
-        * in us over-using our runtime if it is all used during this loop, but
-        * only by limited amounts in that extreme case.
+        * This check is repeated as we release cfs_b->lock while we unthrottle.
         */
        while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
-               runtime = cfs_b->runtime;
                cfs_b->distribute_running = 1;
                raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
                /* we can't nest cfs_b->lock while distributing bandwidth */
-               runtime = distribute_cfs_runtime(cfs_b, runtime);
+               distribute_cfs_runtime(cfs_b);
                raw_spin_lock_irqsave(&cfs_b->lock, flags);
 
                cfs_b->distribute_running = 0;
                throttled = !list_empty(&cfs_b->throttled_cfs_rq);
-
-               lsub_positive(&cfs_b->runtime, runtime);
        }
 
        /*
@@ -5065,10 +5056,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
        if (!runtime)
                return;
 
-       runtime = distribute_cfs_runtime(cfs_b, runtime);
+       distribute_cfs_runtime(cfs_b);
 
        raw_spin_lock_irqsave(&cfs_b->lock, flags);
-       lsub_positive(&cfs_b->runtime, runtime);
        cfs_b->distribute_running = 0;
        raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
 }
@@ -6080,8 +6070,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
        struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
        struct sched_domain *this_sd;
        u64 avg_cost, avg_idle;
-       u64 time, cost;
-       s64 delta;
+       u64 time;
        int this = smp_processor_id();
        int cpu, nr = INT_MAX;
 
@@ -6119,9 +6108,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
        }
 
        time = cpu_clock(this) - time;
-       cost = this_sd->avg_scan_cost;
-       delta = (s64)(time - cost) / 8;
-       this_sd->avg_scan_cost += delta;
+       update_avg(&this_sd->avg_scan_cost, time);
 
        return cpu;
 }
@@ -9048,6 +9035,14 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 
                sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
                                sds->total_capacity;
+               /*
+                * If the local group is more loaded than the selected
+                * busiest group don't try to pull any tasks.
+                */
+               if (local->avg_load >= busiest->avg_load) {
+                       env->imbalance = 0;
+                       return;
+               }
        }
 
        /*