Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
325ea10c IM |
2 | /* |
3 | * Auto-group scheduling implementation: | |
4 | */ | |
354d7793 | 5 | #include <linux/nospec.h> |
25493e5f SS |
6 | #include "sched.h" |
7 | ||
5091faa4 MG |
8 | unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; |
9 | static struct autogroup autogroup_default; | |
10 | static atomic_t autogroup_seq_nr; | |
11 | ||
029632fb | 12 | void __init autogroup_init(struct task_struct *init_task) |
5091faa4 | 13 | { |
07e06b01 | 14 | autogroup_default.tg = &root_task_group; |
5091faa4 MG |
15 | kref_init(&autogroup_default.kref); |
16 | init_rwsem(&autogroup_default.lock); | |
17 | init_task->signal->autogroup = &autogroup_default; | |
18 | } | |
19 | ||
029632fb | 20 | void autogroup_free(struct task_group *tg) |
5091faa4 MG |
21 | { |
22 | kfree(tg->autogroup); | |
23 | } | |
24 | ||
25 | static inline void autogroup_destroy(struct kref *kref) | |
26 | { | |
27 | struct autogroup *ag = container_of(kref, struct autogroup, kref); | |
28 | ||
f4493771 MG |
29 | #ifdef CONFIG_RT_GROUP_SCHED |
30 | /* We've redirected RT tasks to the root task group... */ | |
31 | ag->tg->rt_se = NULL; | |
32 | ag->tg->rt_rq = NULL; | |
33 | #endif | |
ace783b9 | 34 | sched_offline_group(ag->tg); |
5091faa4 MG |
35 | sched_destroy_group(ag->tg); |
36 | } | |
37 | ||
38 | static inline void autogroup_kref_put(struct autogroup *ag) | |
39 | { | |
40 | kref_put(&ag->kref, autogroup_destroy); | |
41 | } | |
42 | ||
43 | static inline struct autogroup *autogroup_kref_get(struct autogroup *ag) | |
44 | { | |
45 | kref_get(&ag->kref); | |
46 | return ag; | |
47 | } | |
48 | ||
4f821987 MG |
49 | static inline struct autogroup *autogroup_task_get(struct task_struct *p) |
50 | { | |
51 | struct autogroup *ag; | |
52 | unsigned long flags; | |
53 | ||
54 | if (!lock_task_sighand(p, &flags)) | |
55 | return autogroup_kref_get(&autogroup_default); | |
56 | ||
57 | ag = autogroup_kref_get(p->signal->autogroup); | |
58 | unlock_task_sighand(p, &flags); | |
59 | ||
60 | return ag; | |
61 | } | |
62 | ||
5091faa4 MG |
63 | static inline struct autogroup *autogroup_create(void) |
64 | { | |
65 | struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); | |
66 | struct task_group *tg; | |
67 | ||
68 | if (!ag) | |
69 | goto out_fail; | |
70 | ||
07e06b01 | 71 | tg = sched_create_group(&root_task_group); |
5091faa4 MG |
72 | if (IS_ERR(tg)) |
73 | goto out_free; | |
74 | ||
75 | kref_init(&ag->kref); | |
76 | init_rwsem(&ag->lock); | |
77 | ag->id = atomic_inc_return(&autogroup_seq_nr); | |
78 | ag->tg = tg; | |
f4493771 MG |
79 | #ifdef CONFIG_RT_GROUP_SCHED |
80 | /* | |
81 | * Autogroup RT tasks are redirected to the root task group | |
82 | * so we don't have to move tasks around upon policy change, | |
83 | * or flail around trying to allocate bandwidth on the fly. | |
84 | * A bandwidth exception in __sched_setscheduler() allows | |
1fe89e1b | 85 | * the policy change to proceed. |
f4493771 MG |
86 | */ |
87 | free_rt_sched_group(tg); | |
88 | tg->rt_se = root_task_group.rt_se; | |
89 | tg->rt_rq = root_task_group.rt_rq; | |
90 | #endif | |
5091faa4 MG |
91 | tg->autogroup = ag; |
92 | ||
41261b6a | 93 | sched_online_group(tg, &root_task_group); |
5091faa4 MG |
94 | return ag; |
95 | ||
96 | out_free: | |
97 | kfree(ag); | |
98 | out_fail: | |
99 | if (printk_ratelimit()) { | |
100 | printk(KERN_WARNING "autogroup_create: %s failure.\n", | |
1e58565e | 101 | ag ? "sched_create_group()" : "kzalloc()"); |
5091faa4 MG |
102 | } |
103 | ||
104 | return autogroup_kref_get(&autogroup_default); | |
105 | } | |
106 | ||
029632fb | 107 | bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) |
5091faa4 MG |
108 | { |
109 | if (tg != &root_task_group) | |
110 | return false; | |
5091faa4 | 111 | /* |
18f649ef ON |
112 | * If we race with autogroup_move_group() the caller can use the old |
113 | * value of signal->autogroup but in this case sched_move_task() will | |
114 | * be called again before autogroup_kref_put(). | |
8e5bfa8c ON |
115 | * |
116 | * However, there is no way sched_autogroup_exit_task() could tell us | |
117 | * to avoid autogroup->tg, so we abuse PF_EXITING flag for this case. | |
5091faa4 | 118 | */ |
8e5bfa8c ON |
119 | if (p->flags & PF_EXITING) |
120 | return false; | |
121 | ||
5091faa4 MG |
122 | return true; |
123 | } | |
124 | ||
8e5bfa8c ON |
125 | void sched_autogroup_exit_task(struct task_struct *p) |
126 | { | |
127 | /* | |
128 | * We are going to call exit_notify() and autogroup_move_group() can't | |
129 | * see this thread after that: we can no longer use signal->autogroup. | |
130 | * See the PF_EXITING check in task_wants_autogroup(). | |
131 | */ | |
132 | sched_move_task(p); | |
133 | } | |
134 | ||
5091faa4 MG |
135 | static void |
136 | autogroup_move_group(struct task_struct *p, struct autogroup *ag) | |
137 | { | |
138 | struct autogroup *prev; | |
139 | struct task_struct *t; | |
140 | unsigned long flags; | |
141 | ||
142 | BUG_ON(!lock_task_sighand(p, &flags)); | |
143 | ||
144 | prev = p->signal->autogroup; | |
145 | if (prev == ag) { | |
146 | unlock_task_sighand(p, &flags); | |
147 | return; | |
148 | } | |
149 | ||
150 | p->signal->autogroup = autogroup_kref_get(ag); | |
18f649ef ON |
151 | /* |
152 | * We can't avoid sched_move_task() after we changed signal->autogroup, | |
153 | * this process can already run with task_group() == prev->tg or we can | |
154 | * race with cgroup code which can read autogroup = prev under rq->lock. | |
155 | * In the latter case for_each_thread() can not miss a migrating thread, | |
156 | * cpu_cgroup_attach() must not be possible after cgroup_exit() and it | |
157 | * can't be removed from thread list, we hold ->siglock. | |
8e5bfa8c ON |
158 | * |
159 | * If an exiting thread was already removed from thread list we rely on | |
160 | * sched_autogroup_exit_task(). | |
18f649ef | 161 | */ |
5aface53 | 162 | for_each_thread(p, t) |
5091faa4 | 163 | sched_move_task(t); |
18f649ef | 164 | |
5091faa4 MG |
165 | unlock_task_sighand(p, &flags); |
166 | autogroup_kref_put(prev); | |
167 | } | |
168 | ||
97fb7a0a | 169 | /* Allocates GFP_KERNEL, cannot be called under any spinlock: */ |
5091faa4 MG |
170 | void sched_autogroup_create_attach(struct task_struct *p) |
171 | { | |
c1ad41f1 | 172 | struct autogroup *ag = autogroup_create(); |
5091faa4 MG |
173 | |
174 | autogroup_move_group(p, ag); | |
97fb7a0a IM |
175 | |
176 | /* Drop extra reference added by autogroup_create(): */ | |
5091faa4 MG |
177 | autogroup_kref_put(ag); |
178 | } | |
179 | EXPORT_SYMBOL(sched_autogroup_create_attach); | |
180 | ||
97fb7a0a | 181 | /* Cannot be called under siglock. Currently has no users: */ |
5091faa4 MG |
182 | void sched_autogroup_detach(struct task_struct *p) |
183 | { | |
184 | autogroup_move_group(p, &autogroup_default); | |
185 | } | |
186 | EXPORT_SYMBOL(sched_autogroup_detach); | |
187 | ||
188 | void sched_autogroup_fork(struct signal_struct *sig) | |
189 | { | |
4f821987 | 190 | sig->autogroup = autogroup_task_get(current); |
5091faa4 MG |
191 | } |
192 | ||
193 | void sched_autogroup_exit(struct signal_struct *sig) | |
194 | { | |
195 | autogroup_kref_put(sig->autogroup); | |
196 | } | |
197 | ||
198 | static int __init setup_autogroup(char *str) | |
199 | { | |
200 | sysctl_sched_autogroup_enabled = 0; | |
201 | ||
202 | return 1; | |
203 | } | |
5091faa4 MG |
204 | __setup("noautogroup", setup_autogroup); |
205 | ||
c1ad41f1 IM |
206 | #ifdef CONFIG_PROC_FS |
207 | ||
208 | int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) | |
209 | { | |
210 | static unsigned long next = INITIAL_JIFFIES; | |
211 | struct autogroup *ag; | |
83929cce | 212 | unsigned long shares; |
354d7793 | 213 | int err, idx; |
c1ad41f1 | 214 | |
75e45d51 | 215 | if (nice < MIN_NICE || nice > MAX_NICE) |
c1ad41f1 IM |
216 | return -EINVAL; |
217 | ||
218 | err = security_task_setnice(current, nice); | |
219 | if (err) | |
220 | return err; | |
221 | ||
222 | if (nice < 0 && !can_nice(current, nice)) | |
223 | return -EPERM; | |
224 | ||
97fb7a0a | 225 | /* This is a heavy operation, taking global locks.. */ |
c1ad41f1 IM |
226 | if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next)) |
227 | return -EAGAIN; | |
228 | ||
229 | next = HZ / 10 + jiffies; | |
230 | ag = autogroup_task_get(p); | |
354d7793 PZ |
231 | |
232 | idx = array_index_nospec(nice + 20, 40); | |
233 | shares = scale_load(sched_prio_to_weight[idx]); | |
c1ad41f1 IM |
234 | |
235 | down_write(&ag->lock); | |
83929cce | 236 | err = sched_group_set_shares(ag->tg, shares); |
c1ad41f1 IM |
237 | if (!err) |
238 | ag->nice = nice; | |
239 | up_write(&ag->lock); | |
240 | ||
241 | autogroup_kref_put(ag); | |
242 | ||
243 | return err; | |
244 | } | |
245 | ||
246 | void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) | |
247 | { | |
248 | struct autogroup *ag = autogroup_task_get(p); | |
249 | ||
250 | if (!task_group_is_autogroup(ag->tg)) | |
251 | goto out; | |
252 | ||
253 | down_read(&ag->lock); | |
254 | seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); | |
255 | up_read(&ag->lock); | |
256 | ||
257 | out: | |
258 | autogroup_kref_put(ag); | |
259 | } | |
260 | #endif /* CONFIG_PROC_FS */ | |
261 | ||
5091faa4 | 262 | #ifdef CONFIG_SCHED_DEBUG |
029632fb | 263 | int autogroup_path(struct task_group *tg, char *buf, int buflen) |
5091faa4 | 264 | { |
511f67a5 | 265 | if (!task_group_is_autogroup(tg)) |
8ecedd7a BR |
266 | return 0; |
267 | ||
5091faa4 MG |
268 | return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); |
269 | } | |
97fb7a0a | 270 | #endif |