sched/topology: Remove unused 'sd' parameter from arch_scale_cpu_capacity()

[linux-2.6-block.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index f35930f5e528a8e1ca8e5f8ed5a6556c86a54701..4f8754157763c1ae491ece1a6488c97928eca258 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -764,7 +764,7 @@ void post_init_entity_util_avg(struct task_struct *p)
         struct sched_entity *se = &p->se;
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
         struct sched_avg *sa = &se->avg;
-       long cpu_scale = arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq)));
+       long cpu_scale = arch_scale_cpu_capacity(cpu_of(rq_of(cfs_rq)));
         long cap = (long)(cpu_scale - cfs_rq->avg.util_avg) / 2;
  
         if (cap > 0) {
@@ -1467,8 +1467,6 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
  }
  
  static unsigned long weighted_cpuload(struct rq *rq);
-static unsigned long source_load(int cpu, int type);
-static unsigned long target_load(int cpu, int type);
  
  /* Cached statistics for all CPUs within a node */
  struct numa_stats {
@@ -1621,7 +1619,7 @@ static void task_numa_compare(struct task_numa_env *env,
          * be incurred if the tasks were swapped.
          */
         /* Skip this swap candidate if cannot move to the source cpu */
-       if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed))
+       if (!cpumask_test_cpu(env->src_cpu, cur->cpus_ptr))
                 goto unlock;
  
         /*
@@ -1718,7 +1716,7 @@ static void task_numa_find_cpu(struct task_numa_env *env,
  
         for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) {
                 /* Skip this CPU if the source task cannot migrate */
-               if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed))
+               if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
                         continue;
  
                 env->dst_cpu = cpu;
@@ -2686,8 +2684,6 @@ static void
  account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
         update_load_add(&cfs_rq->load, se->load.weight);
-       if (!parent_entity(se))
-               update_load_add(&rq_of(cfs_rq)->load, se->load.weight);
  #ifdef CONFIG_SMP
         if (entity_is_task(se)) {
                 struct rq *rq = rq_of(cfs_rq);
@@ -2703,8 +2699,6 @@ static void
  account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
         update_load_sub(&cfs_rq->load, se->load.weight);
-       if (!parent_entity(se))
-               update_load_sub(&rq_of(cfs_rq)->load, se->load.weight);
  #ifdef CONFIG_SMP
         if (entity_is_task(se)) {
                 account_numa_dequeue(rq_of(cfs_rq), task_of(se));
@@ -4100,7 +4094,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
          * least twice that of our own weight (i.e. dont track it
          * when there are only lesser-weight tasks around):
          */
-       if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
+       if (schedstat_enabled() &&
+           rq_of(cfs_rq)->cfs.load.weight >= 2*se->load.weight) {
                 schedstat_set(se->statistics.slice_max,
                         max((u64)schedstat_val(se->statistics.slice_max),
                             se->sum_exec_runtime - se->prev_sum_exec_runtime));
@@ -4734,6 +4729,11 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
         if (runtime_refresh_within(cfs_b, min_left))
                 return;
  
+       /* don't push forwards an existing deferred unthrottle */
+       if (cfs_b->slack_started)
+               return;
+       cfs_b->slack_started = true;
+
         hrtimer_start(&cfs_b->slack_timer,
                         ns_to_ktime(cfs_bandwidth_slack_period),
                         HRTIMER_MODE_REL);
@@ -4787,6 +4787,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
  
         /* confirm we're still not at a refresh boundary */
         raw_spin_lock_irqsave(&cfs_b->lock, flags);
+       cfs_b->slack_started = false;
         if (cfs_b->distribute_running) {
                 raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
                 return;
@@ -4950,6 +4951,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
         hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
         cfs_b->slack_timer.function = sched_cfs_slack_timer;
         cfs_b->distribute_running = 0;
+       cfs_b->slack_started = false;
  }
  
  static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
@@ -5325,71 +5327,6 @@ DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
  DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
  
  #ifdef CONFIG_NO_HZ_COMMON
-/*
- * per rq 'load' arrray crap; XXX kill this.
- */
-
-/*
- * The exact cpuload calculated at every tick would be:
- *
- *   load' = (1 - 1/2^i) * load + (1/2^i) * cur_load
- *
- * If a CPU misses updates for n ticks (as it was idle) and update gets
- * called on the n+1-th tick when CPU may be busy, then we have:
- *
- *   load_n   = (1 - 1/2^i)^n * load_0
- *   load_n+1 = (1 - 1/2^i)   * load_n + (1/2^i) * cur_load
- *
- * decay_load_missed() below does efficient calculation of
- *
- *   load' = (1 - 1/2^i)^n * load
- *
- * Because x^(n+m) := x^n * x^m we can decompose any x^n in power-of-2 factors.
- * This allows us to precompute the above in said factors, thereby allowing the
- * reduction of an arbitrary n in O(log_2 n) steps. (See also
- * fixed_power_int())
- *
- * The calculation is approximated on a 128 point scale.
- */
-#define DEGRADE_SHIFT          7
-
-static const u8 degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
-static const u8 degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
-       {   0,   0,  0,  0,  0,  0, 0, 0 },
-       {  64,  32,  8,  0,  0,  0, 0, 0 },
-       {  96,  72, 40, 12,  1,  0, 0, 0 },
-       { 112,  98, 75, 43, 15,  1, 0, 0 },
-       { 120, 112, 98, 76, 45, 16, 2, 0 }
-};
-
-/*
- * Update cpu_load for any missed ticks, due to tickless idle. The backlog
- * would be when CPU is idle and so we just decay the old load without
- * adding any new load.
- */
-static unsigned long
-decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
-{
-       int j = 0;
-
-       if (!missed_updates)
-               return load;
-
-       if (missed_updates >= degrade_zero_ticks[idx])
-               return 0;
-
-       if (idx == 1)
-               return load >> missed_updates;
-
-       while (missed_updates) {
-               if (missed_updates % 2)
-                       load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT;
-
-               missed_updates >>= 1;
-               j++;
-       }
-       return load;
-}
  
  static struct {
         cpumask_var_t idle_cpus_mask;
@@ -5401,234 +5338,11 @@ static struct {
  
  #endif /* CONFIG_NO_HZ_COMMON */
  
-/**
- * __cpu_load_update - update the rq->cpu_load[] statistics
- * @this_rq: The rq to update statistics for
- * @this_load: The current load
- * @pending_updates: The number of missed updates
- *
- * Update rq->cpu_load[] statistics. This function is usually called every
- * scheduler tick (TICK_NSEC).
- *
- * This function computes a decaying average:
- *
- *   load[i]' = (1 - 1/2^i) * load[i] + (1/2^i) * load
- *
- * Because of NOHZ it might not get called on every tick which gives need for
- * the @pending_updates argument.
- *
- *   load[i]_n = (1 - 1/2^i) * load[i]_n-1 + (1/2^i) * load_n-1
- *             = A * load[i]_n-1 + B ; A := (1 - 1/2^i), B := (1/2^i) * load
- *             = A * (A * load[i]_n-2 + B) + B
- *             = A * (A * (A * load[i]_n-3 + B) + B) + B
- *             = A^3 * load[i]_n-3 + (A^2 + A + 1) * B
- *             = A^n * load[i]_0 + (A^(n-1) + A^(n-2) + ... + 1) * B
- *             = A^n * load[i]_0 + ((1 - A^n) / (1 - A)) * B
- *             = (1 - 1/2^i)^n * (load[i]_0 - load) + load
- *
- * In the above we've assumed load_n := load, which is true for NOHZ_FULL as
- * any change in load would have resulted in the tick being turned back on.
- *
- * For regular NOHZ, this reduces to:
- *
- *   load[i]_n = (1 - 1/2^i)^n * load[i]_0
- *
- * see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra
- * term.
- */
-static void cpu_load_update(struct rq *this_rq, unsigned long this_load,
-                           unsigned long pending_updates)
-{
-       unsigned long __maybe_unused tickless_load = this_rq->cpu_load[0];
-       int i, scale;
-
-       this_rq->nr_load_updates++;
-
-       /* Update our load: */
-       this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
-       for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
-               unsigned long old_load, new_load;
-
-               /* scale is effectively 1 << i now, and >> i divides by scale */
-
-               old_load = this_rq->cpu_load[i];
-#ifdef CONFIG_NO_HZ_COMMON
-               old_load = decay_load_missed(old_load, pending_updates - 1, i);
-               if (tickless_load) {
-                       old_load -= decay_load_missed(tickless_load, pending_updates - 1, i);
-                       /*
-                        * old_load can never be a negative value because a
-                        * decayed tickless_load cannot be greater than the
-                        * original tickless_load.
-                        */
-                       old_load += tickless_load;
-               }
-#endif
-               new_load = this_load;
-               /*
-                * Round up the averaging division if load is increasing. This
-                * prevents us from getting stuck on 9 if the load is 10, for
-                * example.
-                */
-               if (new_load > old_load)
-                       new_load += scale - 1;
-
-               this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
-       }
-}
-
-/* Used instead of source_load when we know the type == 0 */
  static unsigned long weighted_cpuload(struct rq *rq)
  {
         return cfs_rq_runnable_load_avg(&rq->cfs);
  }
  
-#ifdef CONFIG_NO_HZ_COMMON
-/*
- * There is no sane way to deal with nohz on smp when using jiffies because the
- * CPU doing the jiffies update might drift wrt the CPU doing the jiffy reading
- * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
- *
- * Therefore we need to avoid the delta approach from the regular tick when
- * possible since that would seriously skew the load calculation. This is why we
- * use cpu_load_update_periodic() for CPUs out of nohz. However we'll rely on
- * jiffies deltas for updates happening while in nohz mode (idle ticks, idle
- * loop exit, nohz_idle_balance, nohz full exit...)
- *
- * This means we might still be one tick off for nohz periods.
- */
-
-static void cpu_load_update_nohz(struct rq *this_rq,
-                                unsigned long curr_jiffies,
-                                unsigned long load)
-{
-       unsigned long pending_updates;
-
-       pending_updates = curr_jiffies - this_rq->last_load_update_tick;
-       if (pending_updates) {
-               this_rq->last_load_update_tick = curr_jiffies;
-               /*
-                * In the regular NOHZ case, we were idle, this means load 0.
-                * In the NOHZ_FULL case, we were non-idle, we should consider
-                * its weighted load.
-                */
-               cpu_load_update(this_rq, load, pending_updates);
-       }
-}
-
-/*
- * Called from nohz_idle_balance() to update the load ratings before doing the
- * idle balance.
- */
-static void cpu_load_update_idle(struct rq *this_rq)
-{
-       /*
-        * bail if there's load or we're actually up-to-date.
-        */
-       if (weighted_cpuload(this_rq))
-               return;
-
-       cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0);
-}
-
-/*
- * Record CPU load on nohz entry so we know the tickless load to account
- * on nohz exit. cpu_load[0] happens then to be updated more frequently
- * than other cpu_load[idx] but it should be fine as cpu_load readers
- * shouldn't rely into synchronized cpu_load[*] updates.
- */
-void cpu_load_update_nohz_start(void)
-{
-       struct rq *this_rq = this_rq();
-
-       /*
-        * This is all lockless but should be fine. If weighted_cpuload changes
-        * concurrently we'll exit nohz. And cpu_load write can race with
-        * cpu_load_update_idle() but both updater would be writing the same.
-        */
-       this_rq->cpu_load[0] = weighted_cpuload(this_rq);
-}
-
-/*
- * Account the tickless load in the end of a nohz frame.
- */
-void cpu_load_update_nohz_stop(void)
-{
-       unsigned long curr_jiffies = READ_ONCE(jiffies);
-       struct rq *this_rq = this_rq();
-       unsigned long load;
-       struct rq_flags rf;
-
-       if (curr_jiffies == this_rq->last_load_update_tick)
-               return;
-
-       load = weighted_cpuload(this_rq);
-       rq_lock(this_rq, &rf);
-       update_rq_clock(this_rq);
-       cpu_load_update_nohz(this_rq, curr_jiffies, load);
-       rq_unlock(this_rq, &rf);
-}
-#else /* !CONFIG_NO_HZ_COMMON */
-static inline void cpu_load_update_nohz(struct rq *this_rq,
-                                       unsigned long curr_jiffies,
-                                       unsigned long load) { }
-#endif /* CONFIG_NO_HZ_COMMON */
-
-static void cpu_load_update_periodic(struct rq *this_rq, unsigned long load)
-{
-#ifdef CONFIG_NO_HZ_COMMON
-       /* See the mess around cpu_load_update_nohz(). */
-       this_rq->last_load_update_tick = READ_ONCE(jiffies);
-#endif
-       cpu_load_update(this_rq, load, 1);
-}
-
-/*
- * Called from scheduler_tick()
- */
-void cpu_load_update_active(struct rq *this_rq)
-{
-       unsigned long load = weighted_cpuload(this_rq);
-
-       if (tick_nohz_tick_stopped())
-               cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), load);
-       else
-               cpu_load_update_periodic(this_rq, load);
-}
-
-/*
- * Return a low guess at the load of a migration-source CPU weighted
- * according to the scheduling class and "nice" value.
- *
- * We want to under-estimate the load of migration sources, to
- * balance conservatively.
- */
-static unsigned long source_load(int cpu, int type)
-{
-       struct rq *rq = cpu_rq(cpu);
-       unsigned long total = weighted_cpuload(rq);
-
-       if (type == 0 || !sched_feat(LB_BIAS))
-               return total;
-
-       return min(rq->cpu_load[type-1], total);
-}
-
-/*
- * Return a high guess at the load of a migration-target CPU weighted
- * according to the scheduling class and "nice" value.
- */
-static unsigned long target_load(int cpu, int type)
-{
-       struct rq *rq = cpu_rq(cpu);
-       unsigned long total = weighted_cpuload(rq);
-
-       if (type == 0 || !sched_feat(LB_BIAS))
-               return total;
-
-       return max(rq->cpu_load[type-1], total);
-}
-
  static unsigned long capacity_of(int cpu)
  {
         return cpu_rq(cpu)->cpu_capacity;
@@ -5736,7 +5450,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
         s64 this_eff_load, prev_eff_load;
         unsigned long task_load;
  
-       this_eff_load = target_load(this_cpu, sd->wake_idx);
+       this_eff_load = weighted_cpuload(cpu_rq(this_cpu));
  
         if (sync) {
                 unsigned long current_load = task_h_load(current);
@@ -5754,7 +5468,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
                 this_eff_load *= 100;
         this_eff_load *= capacity_of(prev_cpu);
  
-       prev_eff_load = source_load(prev_cpu, sd->wake_idx);
+       prev_eff_load = weighted_cpuload(cpu_rq(prev_cpu));
         prev_eff_load -= task_load;
         if (sched_feat(WA_BIAS))
                 prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
@@ -5815,14 +5529,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
         unsigned long this_runnable_load = ULONG_MAX;
         unsigned long min_avg_load = ULONG_MAX, this_avg_load = ULONG_MAX;
         unsigned long most_spare = 0, this_spare = 0;
-       int load_idx = sd->forkexec_idx;
         int imbalance_scale = 100 + (sd->imbalance_pct-100)/2;
         unsigned long imbalance = scale_load_down(NICE_0_LOAD) *
                                 (sd->imbalance_pct-100) / 100;
  
-       if (sd_flag & SD_BALANCE_WAKE)
-               load_idx = sd->wake_idx;
-
         do {
                 unsigned long load, avg_load, runnable_load;
                 unsigned long spare_cap, max_spare_cap;
@@ -5831,7 +5541,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
  
                 /* Skip over this group if it has no CPUs allowed */
                 if (!cpumask_intersects(sched_group_span(group),
-                                       &p->cpus_allowed))
+                                       p->cpus_ptr))
                         continue;
  
                 local_group = cpumask_test_cpu(this_cpu,
@@ -5846,12 +5556,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
                 max_spare_cap = 0;
  
                 for_each_cpu(i, sched_group_span(group)) {
-                       /* Bias balancing toward CPUs of our domain */
-                       if (local_group)
-                               load = source_load(i, load_idx);
-                       else
-                               load = target_load(i, load_idx);
-
+                       load = weighted_cpuload(cpu_rq(i));
                         runnable_load += load;
  
                         avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
@@ -5963,7 +5668,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
                 return cpumask_first(sched_group_span(group));
  
         /* Traverse only the allowed CPUs */
-       for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
+       for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
                 if (available_idle_cpu(i)) {
                         struct rq *rq = cpu_rq(i);
                         struct cpuidle_state *idle = idle_get_state(rq);
@@ -6003,7 +5708,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
  {
         int new_cpu = cpu;
  
-       if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed))
+       if (!cpumask_intersects(sched_domain_span(sd), p->cpus_ptr))
                 return prev_cpu;
  
         /*
@@ -6120,7 +5825,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
         if (!test_idle_cores(target, false))
                 return -1;
  
-       cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
+       cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
  
         for_each_cpu_wrap(core, cpus, target) {
                 bool idle = true;
@@ -6154,7 +5859,7 @@ static int select_idle_smt(struct task_struct *p, int target)
                 return -1;
  
         for_each_cpu(cpu, cpu_smt_mask(target)) {
-               if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+               if (!cpumask_test_cpu(cpu, p->cpus_ptr))
                         continue;
                 if (available_idle_cpu(cpu))
                         return cpu;
@@ -6217,7 +5922,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
         for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
                 if (!--nr)
                         return -1;
-               if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+               if (!cpumask_test_cpu(cpu, p->cpus_ptr))
                         continue;
                 if (available_idle_cpu(cpu))
                         break;
@@ -6254,7 +5959,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
             recent_used_cpu != target &&
             cpus_share_cache(recent_used_cpu, target) &&
             available_idle_cpu(recent_used_cpu) &&
-           cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
+           cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) {
                 /*
                  * Replace recent_used_cpu with prev as it is a potential
                  * candidate for the next wake:
@@ -6600,7 +6305,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                 int max_spare_cap_cpu = -1;
  
                 for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
-                       if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+                       if (!cpumask_test_cpu(cpu, p->cpus_ptr))
                                 continue;
  
                         /* Skip CPUs that will be overutilized. */
@@ -6689,7 +6394,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
                 }
  
                 want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) &&
-                             cpumask_test_cpu(cpu, &p->cpus_allowed);
+                             cpumask_test_cpu(cpu, p->cpus_ptr);
         }
  
         rcu_read_lock();
@@ -7445,14 +7150,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
         /*
          * We do not migrate tasks that are:
          * 1) throttled_lb_pair, or
-        * 2) cannot be migrated to this CPU due to cpus_allowed, or
+        * 2) cannot be migrated to this CPU due to cpus_ptr, or
          * 3) running (obviously), or
          * 4) are cache-hot on their current CPU.
          */
         if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
                 return 0;
  
-       if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) {
+       if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) {
                 int cpu;
  
                 schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
@@ -7472,7 +7177,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
  
                 /* Prevent to re-select dst_cpu via env's CPUs: */
                 for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
-                       if (cpumask_test_cpu(cpu, &p->cpus_allowed)) {
+                       if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
                                 env->flags |= LBF_DST_PINNED;
                                 env->new_dst_cpu = cpu;
                                 break;
@@ -7695,6 +7400,7 @@ static void attach_tasks(struct lb_env *env)
         rq_unlock(env->dst_rq, &rf);
  }
  
+#ifdef CONFIG_NO_HZ_COMMON
  static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq)
  {
         if (cfs_rq->avg.load_avg)
@@ -7722,6 +7428,19 @@ static inline bool others_have_blocked(struct rq *rq)
         return false;
  }
  
+static inline void update_blocked_load_status(struct rq *rq, bool has_blocked)
+{
+       rq->last_blocked_load_update_tick = jiffies;
+
+       if (!has_blocked)
+               rq->has_blocked_load = 0;
+}
+#else
+static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq) { return false; }
+static inline bool others_have_blocked(struct rq *rq) { return false; }
+static inline void update_blocked_load_status(struct rq *rq, bool has_blocked) {}
+#endif
+
  #ifdef CONFIG_FAIR_GROUP_SCHED
  
  static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
@@ -7787,11 +7506,7 @@ static void update_blocked_averages(int cpu)
         if (others_have_blocked(rq))
                 done = false;
  
-#ifdef CONFIG_NO_HZ_COMMON
-       rq->last_blocked_load_update_tick = jiffies;
-       if (done)
-               rq->has_blocked_load = 0;
-#endif
+       update_blocked_load_status(rq, !done);
         rq_unlock_irqrestore(rq, &rf);
  }
  
@@ -7857,11 +7572,7 @@ static inline void update_blocked_averages(int cpu)
         update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
         update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
         update_irq_load_avg(rq, 0);
-#ifdef CONFIG_NO_HZ_COMMON
-       rq->last_blocked_load_update_tick = jiffies;
-       if (!cfs_rq_has_blocked(cfs_rq) && !others_have_blocked(rq))
-               rq->has_blocked_load = 0;
-#endif
+       update_blocked_load_status(rq, cfs_rq_has_blocked(cfs_rq) || others_have_blocked(rq));
         rq_unlock_irqrestore(rq, &rf);
  }
  
@@ -7879,7 +7590,6 @@ static unsigned long task_h_load(struct task_struct *p)
  struct sg_lb_stats {
         unsigned long avg_load; /*Avg load across the CPUs of the group */
         unsigned long group_load; /* Total load over the CPUs of the group */
-       unsigned long sum_weighted_load; /* Weighted load of group's tasks */
         unsigned long load_per_task;
         unsigned long group_capacity;
         unsigned long group_util; /* Total utilization of the group */
@@ -7933,38 +7643,10 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
         };
  }
  
-/**
- * get_sd_load_idx - Obtain the load index for a given sched domain.
- * @sd: The sched_domain whose load_idx is to be obtained.
- * @idle: The idle status of the CPU for whose sd load_idx is obtained.
- *
- * Return: The load index.
- */
-static inline int get_sd_load_idx(struct sched_domain *sd,
-                                       enum cpu_idle_type idle)
-{
-       int load_idx;
-
-       switch (idle) {
-       case CPU_NOT_IDLE:
-               load_idx = sd->busy_idx;
-               break;
-
-       case CPU_NEWLY_IDLE:
-               load_idx = sd->newidle_idx;
-               break;
-       default:
-               load_idx = sd->idle_idx;
-               break;
-       }
-
-       return load_idx;
-}
-
  static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu)
  {
         struct rq *rq = cpu_rq(cpu);
-       unsigned long max = arch_scale_cpu_capacity(sd, cpu);
+       unsigned long max = arch_scale_cpu_capacity(cpu);
         unsigned long used, free;
         unsigned long irq;
  
@@ -7989,7 +7671,7 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
         unsigned long capacity = scale_rt_capacity(sd, cpu);
         struct sched_group *sdg = sd->groups;
  
-       cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(sd, cpu);
+       cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu);
  
         if (!capacity)
                 capacity = 1;
@@ -8099,7 +7781,7 @@ static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
  
  /*
   * Group imbalance indicates (and tries to solve) the problem where balancing
- * groups is inadequate due to ->cpus_allowed constraints.
+ * groups is inadequate due to ->cpus_ptr constraints.
   *
   * Imagine a situation of two groups of 4 CPUs each and 4 tasks each with a
   * cpumask covering 1 CPU of the first group and 3 CPUs of the second group.
@@ -8249,9 +7931,6 @@ static inline void update_sg_lb_stats(struct lb_env *env,
                                       struct sg_lb_stats *sgs,
                                       int *sg_status)
  {
-       int local_group = cpumask_test_cpu(env->dst_cpu, sched_group_span(group));
-       int load_idx = get_sd_load_idx(env->sd, env->idle);
-       unsigned long load;
         int i, nr_running;
  
         memset(sgs, 0, sizeof(*sgs));
@@ -8262,13 +7941,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
                 if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false))
                         env->flags |= LBF_NOHZ_AGAIN;
  
-               /* Bias balancing toward CPUs of our domain: */
-               if (local_group)
-                       load = target_load(i, load_idx);
-               else
-                       load = source_load(i, load_idx);
-
-               sgs->group_load += load;
+               sgs->group_load += weighted_cpuload(rq);
                 sgs->group_util += cpu_util(i);
                 sgs->sum_nr_running += rq->cfs.h_nr_running;
  
@@ -8283,7 +7956,6 @@ static inline void update_sg_lb_stats(struct lb_env *env,
                 sgs->nr_numa_running += rq->nr_numa_running;
                 sgs->nr_preferred_running += rq->nr_preferred_running;
  #endif
-               sgs->sum_weighted_load += weighted_cpuload(rq);
                 /*
                  * No need to call idle_cpu() if nr_running is not 0
                  */
@@ -8302,7 +7974,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
         sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
  
         if (sgs->sum_nr_running)
-               sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
+               sgs->load_per_task = sgs->group_load / sgs->sum_nr_running;
  
         sgs->group_weight = group->group_weight;
  
@@ -8768,7 +8440,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
         /*
          * If the busiest group is imbalanced the below checks don't
          * work because they assume all things are equal, which typically
-        * isn't true due to cpus_allowed constraints and the like.
+        * isn't true due to cpus_ptr constraints and the like.
          */
         if (busiest->group_type == group_imbalanced)
                 goto force_balance;
@@ -9210,7 +8882,7 @@ more_balance:
                          * if the curr task on busiest CPU can't be
                          * moved to this_cpu:
                          */
-                       if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) {
+                       if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) {
                                 raw_spin_unlock_irqrestore(&busiest->lock,
                                                             flags);
                                 env.flags |= LBF_ALL_PINNED;
@@ -9879,7 +9551,6 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
  
                         rq_lock_irqsave(rq, &rf);
                         update_rq_clock(rq);
-                       cpu_load_update_idle(rq);
                         rq_unlock_irqrestore(rq, &rf);
  
                         if (flags & NOHZ_BALANCE_KICK)