Merge tag 'tomoyo-pr-20210628' of git://git.osdn.net/gitroot/tomoyo/tomoyo-test1

[linux-block.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 7e5946698711244bc115f0ad1c2e7716bcc47414..cf16f8fda9a6b3805a73c4a00231f82407e3afb5 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1403,8 +1403,10 @@ static void uclamp_sync_util_min_rt_default(void)
  static inline struct uclamp_se
  uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
  {
+       /* Copy by value as we could modify it */
         struct uclamp_se uc_req = p->uclamp_req[clamp_id];
  #ifdef CONFIG_UCLAMP_TASK_GROUP
+       unsigned int tg_min, tg_max, value;
  
         /*
          * Tasks in autogroups or root task group will be
@@ -1415,23 +1417,11 @@ uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
         if (task_group(p) == &root_task_group)
                 return uc_req;
  
-       switch (clamp_id) {
-       case UCLAMP_MIN: {
-               struct uclamp_se uc_min = task_group(p)->uclamp[clamp_id];
-               if (uc_req.value < uc_min.value)
-                       return uc_min;
-               break;
-       }
-       case UCLAMP_MAX: {
-               struct uclamp_se uc_max = task_group(p)->uclamp[clamp_id];
-               if (uc_req.value > uc_max.value)
-                       return uc_max;
-               break;
-       }
-       default:
-               WARN_ON_ONCE(1);
-               break;
-       }
+       tg_min = task_group(p)->uclamp[UCLAMP_MIN].value;
+       tg_max = task_group(p)->uclamp[UCLAMP_MAX].value;
+       value = uc_req.value;
+       value = clamp(value, tg_min, tg_max);
+       uclamp_se_set(&uc_req, value, false);
  #endif
  
         return uc_req;
@@ -1630,8 +1620,9 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
  }
  
  static inline void
-uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
+uclamp_update_active(struct task_struct *p)
  {
+       enum uclamp_id clamp_id;
         struct rq_flags rf;
         struct rq *rq;
  
@@ -1651,9 +1642,11 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
          * affecting a valid clamp bucket, the next time it's enqueued,
          * it will already see the updated clamp bucket value.
          */
-       if (p->uclamp[clamp_id].active) {
-               uclamp_rq_dec_id(rq, p, clamp_id);
-               uclamp_rq_inc_id(rq, p, clamp_id);
+       for_each_clamp_id(clamp_id) {
+               if (p->uclamp[clamp_id].active) {
+                       uclamp_rq_dec_id(rq, p, clamp_id);
+                       uclamp_rq_inc_id(rq, p, clamp_id);
+               }
         }
  
         task_rq_unlock(rq, p, &rf);
@@ -1661,20 +1654,14 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
  
  #ifdef CONFIG_UCLAMP_TASK_GROUP
  static inline void
-uclamp_update_active_tasks(struct cgroup_subsys_state *css,
-                          unsigned int clamps)
+uclamp_update_active_tasks(struct cgroup_subsys_state *css)
  {
-       enum uclamp_id clamp_id;
         struct css_task_iter it;
         struct task_struct *p;
  
         css_task_iter_start(css, 0, &it);
-       while ((p = css_task_iter_next(&it))) {
-               for_each_clamp_id(clamp_id) {
-                       if ((0x1 << clamp_id) & clamps)
-                               uclamp_update_active(p, clamp_id);
-               }
-       }
+       while ((p = css_task_iter_next(&it)))
+               uclamp_update_active(p);
         css_task_iter_end(&it);
  }
  
@@ -1941,6 +1928,11 @@ static inline void uclamp_post_fork(struct task_struct *p) { }
  static inline void init_uclamp(void) { }
  #endif /* CONFIG_UCLAMP_TASK */
  
+bool sched_task_on_rq(struct task_struct *p)
+{
+       return task_on_rq_queued(p);
+}
+
  static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
  {
         if (!(flags & ENQUEUE_NOCLOCK))
@@ -2638,7 +2630,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
                 return -EINVAL;
         }
  
-       if (task_running(rq, p) || p->state == TASK_WAKING) {
+       if (task_running(rq, p) || READ_ONCE(p->__state) == TASK_WAKING) {
                 /*
                  * MIGRATE_ENABLE gets here because 'p == current', but for
                  * anything else we cannot do is_migration_disabled(), punt
@@ -2781,19 +2773,20 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
  void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
  {
  #ifdef CONFIG_SCHED_DEBUG
+       unsigned int state = READ_ONCE(p->__state);
+
         /*
          * We should never call set_task_cpu() on a blocked task,
          * ttwu() will sort out the placement.
          */
-       WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
-                       !p->on_rq);
+       WARN_ON_ONCE(state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq);
  
         /*
          * Migrating fair class task must have p->on_rq = TASK_ON_RQ_MIGRATING,
          * because schedstat_wait_{start,end} rebase migrating task's wait_start
          * time relying on p->on_rq.
          */
-       WARN_ON_ONCE(p->state == TASK_RUNNING &&
+       WARN_ON_ONCE(state == TASK_RUNNING &&
                      p->sched_class == &fair_sched_class &&
                      (p->on_rq && !task_on_rq_migrating(p)));
  
@@ -2965,7 +2958,7 @@ out:
   * smp_call_function() if an IPI is sent by the same process we are
   * waiting to become inactive.
   */
-unsigned long wait_task_inactive(struct task_struct *p, long match_state)
+unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
  {
         int running, queued;
         struct rq_flags rf;
@@ -2993,7 +2986,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
                  * is actually now running somewhere else!
                  */
                 while (task_running(rq, p)) {
-                       if (match_state && unlikely(p->state != match_state))
+                       if (match_state && unlikely(READ_ONCE(p->__state) != match_state))
                                 return 0;
                         cpu_relax();
                 }
@@ -3008,7 +3001,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
                 running = task_running(rq, p);
                 queued = task_on_rq_queued(p);
                 ncsw = 0;
-               if (!match_state || p->state == match_state)
+               if (!match_state || READ_ONCE(p->__state) == match_state)
                         ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
                 task_rq_unlock(rq, p, &rf);
  
@@ -3317,7 +3310,7 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
                            struct rq_flags *rf)
  {
         check_preempt_curr(rq, p, wake_flags);
-       p->state = TASK_RUNNING;
+       WRITE_ONCE(p->__state, TASK_RUNNING);
         trace_sched_wakeup(p);
  
  #ifdef CONFIG_SMP
@@ -3340,6 +3333,9 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
                 if (rq->avg_idle > max)
                         rq->avg_idle = max;
  
+               rq->wake_stamp = jiffies;
+               rq->wake_avg_idle = rq->avg_idle / 2;
+
                 rq->idle_stamp = 0;
         }
  #endif
@@ -3706,12 +3702,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
                  *  - we're serialized against set_special_state() by virtue of
                  *    it disabling IRQs (this allows not taking ->pi_lock).
                  */
-               if (!(p->state & state))
+               if (!(READ_ONCE(p->__state) & state))
                         goto out;
  
                 success = 1;
                 trace_sched_waking(p);
-               p->state = TASK_RUNNING;
+               WRITE_ONCE(p->__state, TASK_RUNNING);
                 trace_sched_wakeup(p);
                 goto out;
         }
@@ -3724,7 +3720,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
          */
         raw_spin_lock_irqsave(&p->pi_lock, flags);
         smp_mb__after_spinlock();
-       if (!(p->state & state))
+       if (!(READ_ONCE(p->__state) & state))
                 goto unlock;
  
         trace_sched_waking(p);
@@ -3790,7 +3786,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
          * TASK_WAKING such that we can unlock p->pi_lock before doing the
          * enqueue, such as ttwu_queue_wakelist().
          */
-       p->state = TASK_WAKING;
+       WRITE_ONCE(p->__state, TASK_WAKING);
  
         /*
          * If the owning (remote) CPU is still in the middle of schedule() with
@@ -3883,7 +3879,7 @@ bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct t
                         ret = func(p, arg);
                 rq_unlock(rq, &rf);
         } else {
-               switch (p->state) {
+               switch (READ_ONCE(p->__state)) {
                 case TASK_RUNNING:
                 case TASK_WAKING:
                         break;
@@ -4009,7 +4005,6 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
  #ifdef CONFIG_SCHEDSTATS
  
  DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-static bool __initdata __sched_schedstats = false;
  
  static void set_schedstats(bool enabled)
  {
@@ -4033,16 +4028,11 @@ static int __init setup_schedstats(char *str)
         if (!str)
                 goto out;
  
-       /*
-        * This code is called before jump labels have been set up, so we can't
-        * change the static branch directly just yet.  Instead set a temporary
-        * variable so init_schedstats() can do it later.
-        */
         if (!strcmp(str, "enable")) {
-               __sched_schedstats = true;
+               set_schedstats(true);
                 ret = 1;
         } else if (!strcmp(str, "disable")) {
-               __sched_schedstats = false;
+               set_schedstats(false);
                 ret = 1;
         }
  out:
@@ -4053,11 +4043,6 @@ out:
  }
  __setup("schedstats=", setup_schedstats);
  
-static void __init init_schedstats(void)
-{
-       set_schedstats(__sched_schedstats);
-}
-
  #ifdef CONFIG_PROC_SYSCTL
  int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
                 size_t *lenp, loff_t *ppos)
@@ -4079,8 +4064,6 @@ int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
         return err;
  }
  #endif /* CONFIG_PROC_SYSCTL */
-#else  /* !CONFIG_SCHEDSTATS */
-static inline void init_schedstats(void) {}
  #endif /* CONFIG_SCHEDSTATS */
  
  /*
@@ -4096,7 +4079,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
          * nobody will actually run it, and a signal or other external
          * event cannot wake it up and insert it on the runqueue either.
          */
-       p->state = TASK_NEW;
+       p->__state = TASK_NEW;
  
         /*
          * Make sure we do not leak PI boosting priority to the child.
@@ -4202,7 +4185,7 @@ void wake_up_new_task(struct task_struct *p)
         struct rq *rq;
  
         raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
-       p->state = TASK_RUNNING;
+       WRITE_ONCE(p->__state, TASK_RUNNING);
  #ifdef CONFIG_SMP
         /*
          * Fork balancing, do it here and not earlier because:
@@ -4564,10 +4547,11 @@ static struct rq *finish_task_switch(struct task_struct *prev)
          * running on another CPU and we could rave with its RUNNING -> DEAD
          * transition, resulting in a double drop.
          */
-       prev_state = prev->state;
+       prev_state = READ_ONCE(prev->__state);
         vtime_task_switch(prev);
         perf_event_task_sched_in(prev, current);
         finish_task(prev);
+       tick_nohz_task_switch();
         finish_lock_switch(rq);
         finish_arch_post_lock_switch();
         kcov_finish_switch(current);
@@ -4613,7 +4597,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
                 put_task_struct_rcu_user(prev);
         }
  
-       tick_nohz_task_switch();
         return rq;
  }
  
@@ -5258,7 +5241,7 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
  #endif
  
  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-       if (!preempt && prev->state && prev->non_block_count) {
+       if (!preempt && READ_ONCE(prev->__state) && prev->non_block_count) {
                 printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
                         prev->comm, prev->pid, prev->non_block_count);
                 dump_stack();
@@ -5884,10 +5867,10 @@ static void __sched notrace __schedule(bool preempt)
          *  - we form a control dependency vs deactivate_task() below.
          *  - ptrace_{,un}freeze_traced() can change ->state underneath us.
          */
-       prev_state = prev->state;
+       prev_state = READ_ONCE(prev->__state);
         if (!preempt && prev_state) {
                 if (signal_pending_state(prev_state, prev)) {
-                       prev->state = TASK_RUNNING;
+                       WRITE_ONCE(prev->__state, TASK_RUNNING);
                 } else {
                         prev->sched_contributes_to_load =
                                 (prev_state & TASK_UNINTERRUPTIBLE) &&
@@ -5984,7 +5967,7 @@ static inline void sched_submit_work(struct task_struct *tsk)
  {
         unsigned int task_flags;
  
-       if (!tsk->state)
+       if (task_is_running(tsk))
                 return;
  
         task_flags = tsk->flags;
@@ -6059,7 +6042,7 @@ void __sched schedule_idle(void)
          * current task can be in any other state. Note, idle is always in the
          * TASK_RUNNING state.
          */
-       WARN_ON_ONCE(current->state);
+       WARN_ON_ONCE(current->__state);
         do {
                 __schedule(false);
         } while (need_resched());
@@ -7199,7 +7182,6 @@ int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
  {
         return __sched_setscheduler(p, attr, false, true);
  }
-EXPORT_SYMBOL_GPL(sched_setattr_nocheck);
  
  /**
   * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
@@ -7959,7 +7941,7 @@ again:
         if (curr->sched_class != p->sched_class)
                 goto out_unlock;
  
-       if (task_running(p_rq, p) || p->state)
+       if (task_running(p_rq, p) || !task_is_running(p))
                 goto out_unlock;
  
         yielded = curr->sched_class->yield_to_task(rq, p);
@@ -8162,7 +8144,7 @@ void sched_show_task(struct task_struct *p)
  
         pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p));
  
-       if (p->state == TASK_RUNNING)
+       if (task_is_running(p))
                 pr_cont("  running task    ");
  #ifdef CONFIG_DEBUG_STACK_USAGE
         free = stack_not_used(p);
@@ -8186,26 +8168,28 @@ EXPORT_SYMBOL_GPL(sched_show_task);
  static inline bool
  state_filter_match(unsigned long state_filter, struct task_struct *p)
  {
+       unsigned int state = READ_ONCE(p->__state);
+
         /* no filter, everything matches */
         if (!state_filter)
                 return true;
  
         /* filter, but doesn't match */
-       if (!(p->state & state_filter))
+       if (!(state & state_filter))
                 return false;
  
         /*
          * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
          * TASK_KILLABLE).
          */
-       if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
+       if (state_filter == TASK_UNINTERRUPTIBLE && state == TASK_IDLE)
                 return false;
  
         return true;
  }
  
  
-void show_state_filter(unsigned long state_filter)
+void show_state_filter(unsigned int state_filter)
  {
         struct task_struct *g, *p;
  
@@ -8262,7 +8246,7 @@ void __init init_idle(struct task_struct *idle, int cpu)
         raw_spin_lock_irqsave(&idle->pi_lock, flags);
         raw_spin_rq_lock(rq);
  
-       idle->state = TASK_RUNNING;
+       idle->__state = TASK_RUNNING;
         idle->se.exec_start = sched_clock();
         /*
          * PF_KTHREAD should already be set at this point; regardless, make it
@@ -9036,6 +9020,8 @@ void __init sched_init(void)
                 rq->online = 0;
                 rq->idle_stamp = 0;
                 rq->avg_idle = 2*sysctl_sched_migration_cost;
+               rq->wake_stamp = jiffies;
+               rq->wake_avg_idle = rq->avg_idle;
                 rq->max_idle_balance_cost = sysctl_sched_migration_cost;
  
                 INIT_LIST_HEAD(&rq->cfs_tasks);
@@ -9089,8 +9075,6 @@ void __init sched_init(void)
  #endif
         init_sched_fair_class();
  
-       init_schedstats();
-
         psi_init();
  
         init_uclamp();
@@ -9108,15 +9092,15 @@ static inline int preempt_count_equals(int preempt_offset)
  
  void __might_sleep(const char *file, int line, int preempt_offset)
  {
+       unsigned int state = get_current_state();
         /*
          * Blocking primitives will set (and therefore destroy) current->state,
          * since we will exit with TASK_RUNNING make sure we enter with it,
          * otherwise we will destroy state.
          */
-       WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
+       WARN_ONCE(state != TASK_RUNNING && current->task_state_change,
                         "do not call blocking ops when !TASK_RUNNING; "
-                       "state=%lx set at [<%p>] %pS\n",
-                       current->state,
+                       "state=%x set at [<%p>] %pS\n", state,
                         (void *)current->task_state_change,
                         (void *)current->task_state_change);
  
@@ -9577,7 +9561,7 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
                  * has happened. This would lead to problems with PELT, due to
                  * move wanting to detach+attach while we're not attached yet.
                  */
-               if (task->state == TASK_NEW)
+               if (READ_ONCE(task->__state) == TASK_NEW)
                         ret = -EINVAL;
                 raw_spin_unlock_irq(&task->pi_lock);
  
@@ -9641,7 +9625,7 @@ static void cpu_util_update_eff(struct cgroup_subsys_state *css)
                 }
  
                 /* Immediately update descendants RUNNABLE tasks */
-               uclamp_update_active_tasks(css, clamps);
+               uclamp_update_active_tasks(css);
         }
  }
  
@@ -9800,7 +9784,8 @@ static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
  
  static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
  
-static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
+static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
+                               u64 burst)
  {
         int i, ret = 0, runtime_enabled, runtime_was_enabled;
         struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
@@ -9830,6 +9815,10 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
         if (quota != RUNTIME_INF && quota > max_cfs_runtime)
                 return -EINVAL;
  
+       if (quota != RUNTIME_INF && (burst > quota ||
+                                    burst + quota > max_cfs_runtime))
+               return -EINVAL;
+
         /*
          * Prevent race between setting of cfs_rq->runtime_enabled and
          * unthrottle_offline_cfs_rqs().
@@ -9851,6 +9840,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
         raw_spin_lock_irq(&cfs_b->lock);
         cfs_b->period = ns_to_ktime(period);
         cfs_b->quota = quota;
+       cfs_b->burst = burst;
  
         __refill_cfs_bandwidth_runtime(cfs_b);
  
@@ -9884,9 +9874,10 @@ out_unlock:
  
  static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
  {
-       u64 quota, period;
+       u64 quota, period, burst;
  
         period = ktime_to_ns(tg->cfs_bandwidth.period);
+       burst = tg->cfs_bandwidth.burst;
         if (cfs_quota_us < 0)
                 quota = RUNTIME_INF;
         else if ((u64)cfs_quota_us <= U64_MAX / NSEC_PER_USEC)
@@ -9894,7 +9885,7 @@ static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
         else
                 return -EINVAL;
  
-       return tg_set_cfs_bandwidth(tg, period, quota);
+       return tg_set_cfs_bandwidth(tg, period, quota, burst);
  }
  
  static long tg_get_cfs_quota(struct task_group *tg)
@@ -9912,15 +9903,16 @@ static long tg_get_cfs_quota(struct task_group *tg)
  
  static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
  {
-       u64 quota, period;
+       u64 quota, period, burst;
  
         if ((u64)cfs_period_us > U64_MAX / NSEC_PER_USEC)
                 return -EINVAL;
  
         period = (u64)cfs_period_us * NSEC_PER_USEC;
         quota = tg->cfs_bandwidth.quota;
+       burst = tg->cfs_bandwidth.burst;
  
-       return tg_set_cfs_bandwidth(tg, period, quota);
+       return tg_set_cfs_bandwidth(tg, period, quota, burst);
  }
  
  static long tg_get_cfs_period(struct task_group *tg)
@@ -9933,6 +9925,30 @@ static long tg_get_cfs_period(struct task_group *tg)
         return cfs_period_us;
  }
  
+static int tg_set_cfs_burst(struct task_group *tg, long cfs_burst_us)
+{
+       u64 quota, period, burst;
+
+       if ((u64)cfs_burst_us > U64_MAX / NSEC_PER_USEC)
+               return -EINVAL;
+
+       burst = (u64)cfs_burst_us * NSEC_PER_USEC;
+       period = ktime_to_ns(tg->cfs_bandwidth.period);
+       quota = tg->cfs_bandwidth.quota;
+
+       return tg_set_cfs_bandwidth(tg, period, quota, burst);
+}
+
+static long tg_get_cfs_burst(struct task_group *tg)
+{
+       u64 burst_us;
+
+       burst_us = tg->cfs_bandwidth.burst;
+       do_div(burst_us, NSEC_PER_USEC);
+
+       return burst_us;
+}
+
  static s64 cpu_cfs_quota_read_s64(struct cgroup_subsys_state *css,
                                   struct cftype *cft)
  {
@@ -9957,6 +9973,18 @@ static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css,
         return tg_set_cfs_period(css_tg(css), cfs_period_us);
  }
  
+static u64 cpu_cfs_burst_read_u64(struct cgroup_subsys_state *css,
+                                 struct cftype *cft)
+{
+       return tg_get_cfs_burst(css_tg(css));
+}
+
+static int cpu_cfs_burst_write_u64(struct cgroup_subsys_state *css,
+                                  struct cftype *cftype, u64 cfs_burst_us)
+{
+       return tg_set_cfs_burst(css_tg(css), cfs_burst_us);
+}
+
  struct cfs_schedulable_data {
         struct task_group *tg;
         u64 period, quota;
@@ -10109,6 +10137,11 @@ static struct cftype cpu_legacy_files[] = {
                 .read_u64 = cpu_cfs_period_read_u64,
                 .write_u64 = cpu_cfs_period_write_u64,
         },
+       {
+               .name = "cfs_burst_us",
+               .read_u64 = cpu_cfs_burst_read_u64,
+               .write_u64 = cpu_cfs_burst_write_u64,
+       },
         {
                 .name = "stat",
                 .seq_show = cpu_cfs_stat_show,
@@ -10274,12 +10307,13 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of,
  {
         struct task_group *tg = css_tg(of_css(of));
         u64 period = tg_get_cfs_period(tg);
+       u64 burst = tg_get_cfs_burst(tg);
         u64 quota;
         int ret;
  
         ret = cpu_period_quota_parse(buf, &period, &quota);
         if (!ret)
-               ret = tg_set_cfs_bandwidth(tg, period, quota);
+               ret = tg_set_cfs_bandwidth(tg, period, quota, burst);
         return ret ?: nbytes;
  }
  #endif
@@ -10306,6 +10340,12 @@ static struct cftype cpu_files[] = {
                 .seq_show = cpu_max_show,
                 .write = cpu_max_write,
         },
+       {
+               .name = "max.burst",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .read_u64 = cpu_cfs_burst_read_u64,
+               .write_u64 = cpu_cfs_burst_write_u64,
+       },
  #endif
  #ifdef CONFIG_UCLAMP_TASK_GROUP
         {