Merge tag 'sched-urgent-2024-04-28' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 28 Apr 2024 19:11:26 +0000 (12:11 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 28 Apr 2024 19:11:26 +0000 (12:11 -0700)
Pull scheduler fixes from Ingo Molnar:

 - Fix EEVDF corner cases

 - Fix two nohz_full= related bugs that can cause boot crashes
   and warnings

* tag 'sched-urgent-2024-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/isolation: Fix boot crash when maxcpus < first housekeeping CPU
  sched/isolation: Prevent boot crash when the boot CPU is nohz_full
  sched/eevdf: Prevent vlag from going out of bounds in reweight_eevdf()
  sched/eevdf: Fix miscalculation in reweight_entity() when se is not curr
  sched/eevdf: Always update V if se->on_rq when reweighting

Documentation/timers/no_hz.rst
kernel/sched/fair.c
kernel/sched/isolation.c

index f8786be15183c15879a188f61fa36bceda99c6e9..7fe8ef9718d8e357567671295a3cc4dc1c97751a 100644 (file)
@@ -129,11 +129,8 @@ adaptive-tick CPUs:  At least one non-adaptive-tick CPU must remain
 online to handle timekeeping tasks in order to ensure that system
 calls like gettimeofday() returns accurate values on adaptive-tick CPUs.
 (This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running
-user processes to observe slight drifts in clock rate.)  Therefore, the
-boot CPU is prohibited from entering adaptive-ticks mode.  Specifying a
-"nohz_full=" mask that includes the boot CPU will result in a boot-time
-error message, and the boot CPU will be removed from the mask.  Note that
-this means that your system must have at least two CPUs in order for
+user processes to observe slight drifts in clock rate.) Note that this
+means that your system must have at least two CPUs in order for
 CONFIG_NO_HZ_FULL=y to do anything for you.
 
 Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
index 03be0d1330a6b22336c91cd4cfeae7fd82838692..c62805dbd6088b3bd63ac07766b92443f35f0e36 100644 (file)
@@ -696,15 +696,21 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
  *
  * XXX could add max_slice to the augmented data to track this.
  */
-static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static s64 entity_lag(u64 avruntime, struct sched_entity *se)
 {
-       s64 lag, limit;
+       s64 vlag, limit;
+
+       vlag = avruntime - se->vruntime;
+       limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
 
+       return clamp(vlag, -limit, limit);
+}
+
+static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
        SCHED_WARN_ON(!se->on_rq);
-       lag = avg_vruntime(cfs_rq) - se->vruntime;
 
-       limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
-       se->vlag = clamp(lag, -limit, limit);
+       se->vlag = entity_lag(avg_vruntime(cfs_rq), se);
 }
 
 /*
@@ -3676,11 +3682,10 @@ static inline void
 dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
 #endif
 
-static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
+static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
                           unsigned long weight)
 {
        unsigned long old_weight = se->load.weight;
-       u64 avruntime = avg_vruntime(cfs_rq);
        s64 vlag, vslice;
 
        /*
@@ -3761,7 +3766,7 @@ static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
         *         = V  - vl'
         */
        if (avruntime != se->vruntime) {
-               vlag = (s64)(avruntime - se->vruntime);
+               vlag = entity_lag(avruntime, se);
                vlag = div_s64(vlag * old_weight, weight);
                se->vruntime = avruntime - vlag;
        }
@@ -3787,25 +3792,26 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
                            unsigned long weight)
 {
        bool curr = cfs_rq->curr == se;
+       u64 avruntime;
 
        if (se->on_rq) {
                /* commit outstanding execution time */
-               if (curr)
-                       update_curr(cfs_rq);
-               else
+               update_curr(cfs_rq);
+               avruntime = avg_vruntime(cfs_rq);
+               if (!curr)
                        __dequeue_entity(cfs_rq, se);
                update_load_sub(&cfs_rq->load, se->load.weight);
        }
        dequeue_load_avg(cfs_rq, se);
 
-       if (!se->on_rq) {
+       if (se->on_rq) {
+               reweight_eevdf(se, avruntime, weight);
+       } else {
                /*
                 * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
                 * we need to scale se->vlag when w_i changes.
                 */
                se->vlag = div_s64(se->vlag * se->load.weight, weight);
-       } else {
-               reweight_eevdf(cfs_rq, se, weight);
        }
 
        update_load_set(&se->load, weight);
index 373d42c707bc5d65d70c18d66e0ffd1621d33f5b..5891e715f00d028b0d2f4fd157e99f319b899a1d 100644 (file)
@@ -46,7 +46,16 @@ int housekeeping_any_cpu(enum hk_type type)
                        if (cpu < nr_cpu_ids)
                                return cpu;
 
-                       return cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
+                       cpu = cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
+                       if (likely(cpu < nr_cpu_ids))
+                               return cpu;
+                       /*
+                        * Unless we have another problem this can only happen
+                        * at boot time before start_secondary() brings the 1st
+                        * housekeeping CPU up.
+                        */
+                       WARN_ON_ONCE(system_state == SYSTEM_RUNNING ||
+                                    type != HK_TYPE_TIMER);
                }
        }
        return smp_processor_id();
@@ -109,6 +118,7 @@ static void __init housekeeping_setup_type(enum hk_type type,
 static int __init housekeeping_setup(char *str, unsigned long flags)
 {
        cpumask_var_t non_housekeeping_mask, housekeeping_staging;
+       unsigned int first_cpu;
        int err = 0;
 
        if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) {
@@ -129,7 +139,8 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
        cpumask_andnot(housekeeping_staging,
                       cpu_possible_mask, non_housekeeping_mask);
 
-       if (!cpumask_intersects(cpu_present_mask, housekeeping_staging)) {
+       first_cpu = cpumask_first_and(cpu_present_mask, housekeeping_staging);
+       if (first_cpu >= nr_cpu_ids || first_cpu >= setup_max_cpus) {
                __cpumask_set_cpu(smp_processor_id(), housekeeping_staging);
                __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
                if (!housekeeping.flags) {
@@ -138,6 +149,9 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
                }
        }
 
+       if (cpumask_empty(non_housekeeping_mask))
+               goto free_housekeeping_staging;
+
        if (!housekeeping.flags) {
                /* First setup call ("nohz_full=" or "isolcpus=") */
                enum hk_type type;