Commit | Line | Data |
---|---|---|
3dcf60bc | 1 | // SPDX-License-Identifier: GPL-2.0 |
22e2c507 JA |
2 | /* |
3 | * fs/ioprio.c | |
4 | * | |
0fe23479 | 5 | * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk> |
22e2c507 JA |
6 | * |
7 | * Helper functions for setting/querying io priorities of processes. The | |
8 | * system calls closely mimmick getpriority/setpriority, see the man page for | |
9 | * those. The prio argument is a composite of prio class and prio data, where | |
10 | * the data argument has meaning within that class. The standard scheduling | |
11 | * classes have 8 distinct prio levels, with 0 being the highest prio and 7 | |
12 | * being the lowest. | |
13 | * | |
14 | * IOW, setting BE scheduling class with prio 2 is done ala: | |
15 | * | |
16 | * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2; | |
17 | * | |
18 | * ioprio_set(PRIO_PROCESS, pid, prio); | |
19 | * | |
898bd37a | 20 | * See also Documentation/block/ioprio.rst |
22e2c507 JA |
21 | * |
22 | */ | |
5a0e3ad6 | 23 | #include <linux/gfp.h> |
22e2c507 JA |
24 | #include <linux/kernel.h> |
25 | #include <linux/ioprio.h> | |
5b825c3a | 26 | #include <linux/cred.h> |
22e2c507 | 27 | #include <linux/blkdev.h> |
16f7e0fe | 28 | #include <linux/capability.h> |
9abdc4cd | 29 | #include <linux/syscalls.h> |
03e68060 | 30 | #include <linux/security.h> |
b488893a | 31 | #include <linux/pid_namespace.h> |
22e2c507 | 32 | |
aa434577 | 33 | int ioprio_check_cap(int ioprio) |
22e2c507 JA |
34 | { |
35 | int class = IOPRIO_PRIO_CLASS(ioprio); | |
36 | int data = IOPRIO_PRIO_DATA(ioprio); | |
22e2c507 JA |
37 | |
38 | switch (class) { | |
39 | case IOPRIO_CLASS_RT: | |
94c4b4fd AD |
40 | /* |
41 | * Originally this only checked for CAP_SYS_ADMIN, | |
42 | * which was implicitly allowed for pid 0 by security | |
43 | * modules such as SELinux. Make sure we check | |
44 | * CAP_SYS_ADMIN first to avoid a denial/avc for | |
45 | * possibly missing CAP_SYS_NICE permission. | |
46 | */ | |
47 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE)) | |
22e2c507 | 48 | return -EPERM; |
df561f66 | 49 | fallthrough; |
e29387eb | 50 | /* rt has prio field too */ |
22e2c507 | 51 | case IOPRIO_CLASS_BE: |
202bc942 | 52 | if (data >= IOPRIO_NR_LEVELS || data < 0) |
22e2c507 | 53 | return -EINVAL; |
22e2c507 JA |
54 | break; |
55 | case IOPRIO_CLASS_IDLE: | |
56 | break; | |
8ec680e4 JA |
57 | case IOPRIO_CLASS_NONE: |
58 | if (data) | |
59 | return -EINVAL; | |
60 | break; | |
22e2c507 JA |
61 | default: |
62 | return -EINVAL; | |
63 | } | |
64 | ||
aa434577 AM |
65 | return 0; |
66 | } | |
67 | ||
68 | SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) | |
69 | { | |
70 | struct task_struct *p, *g; | |
71 | struct user_struct *user; | |
72 | struct pid *pgrp; | |
73 | kuid_t uid; | |
74 | int ret; | |
75 | ||
76 | ret = ioprio_check_cap(ioprio); | |
77 | if (ret) | |
78 | return ret; | |
79 | ||
22e2c507 | 80 | ret = -ESRCH; |
d69b78ba | 81 | rcu_read_lock(); |
22e2c507 JA |
82 | switch (which) { |
83 | case IOPRIO_WHO_PROCESS: | |
84 | if (!who) | |
85 | p = current; | |
86 | else | |
228ebcbe | 87 | p = find_task_by_vpid(who); |
22e2c507 JA |
88 | if (p) |
89 | ret = set_task_ioprio(p, ioprio); | |
90 | break; | |
91 | case IOPRIO_WHO_PGRP: | |
92 | if (!who) | |
41487c65 EB |
93 | pgrp = task_pgrp(current); |
94 | else | |
b488893a | 95 | pgrp = find_vpid(who); |
40c7fd3f PZ |
96 | |
97 | read_lock(&tasklist_lock); | |
2d70b68d | 98 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
22e2c507 | 99 | ret = set_task_ioprio(p, ioprio); |
40c7fd3f PZ |
100 | if (ret) { |
101 | read_unlock(&tasklist_lock); | |
102 | goto out; | |
103 | } | |
2d70b68d | 104 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
40c7fd3f PZ |
105 | read_unlock(&tasklist_lock); |
106 | ||
22e2c507 JA |
107 | break; |
108 | case IOPRIO_WHO_USER: | |
7b44ab97 EB |
109 | uid = make_kuid(current_user_ns(), who); |
110 | if (!uid_valid(uid)) | |
111 | break; | |
22e2c507 | 112 | if (!who) |
86a264ab | 113 | user = current_user(); |
22e2c507 | 114 | else |
7b44ab97 | 115 | user = find_user(uid); |
22e2c507 JA |
116 | |
117 | if (!user) | |
118 | break; | |
119 | ||
612dafab | 120 | for_each_process_thread(g, p) { |
8639b461 BS |
121 | if (!uid_eq(task_uid(p), uid) || |
122 | !task_pid_vnr(p)) | |
22e2c507 JA |
123 | continue; |
124 | ret = set_task_ioprio(p, ioprio); | |
125 | if (ret) | |
78bd4d48 | 126 | goto free_uid; |
612dafab | 127 | } |
78bd4d48 | 128 | free_uid: |
22e2c507 JA |
129 | if (who) |
130 | free_uid(user); | |
131 | break; | |
132 | default: | |
133 | ret = -EINVAL; | |
134 | } | |
135 | ||
40c7fd3f | 136 | out: |
d69b78ba | 137 | rcu_read_unlock(); |
22e2c507 JA |
138 | return ret; |
139 | } | |
140 | ||
893e5d32 JK |
141 | /* |
142 | * If the task has set an I/O priority, use that. Otherwise, return | |
143 | * the default I/O priority. | |
144 | * | |
145 | * Expected to be called for current task or with task_lock() held to keep | |
146 | * io_context stable. | |
147 | */ | |
148 | int __get_task_ioprio(struct task_struct *p) | |
149 | { | |
150 | struct io_context *ioc = p->io_context; | |
151 | int prio; | |
152 | ||
153 | if (p != current) | |
154 | lockdep_assert_held(&p->alloc_lock); | |
155 | if (ioc) | |
156 | prio = ioc->ioprio; | |
157 | else | |
158 | prio = IOPRIO_DEFAULT; | |
159 | ||
160 | if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE) | |
161 | prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p), | |
162 | task_nice_ioprio(p)); | |
163 | return prio; | |
164 | } | |
165 | EXPORT_SYMBOL_GPL(__get_task_ioprio); | |
166 | ||
a1836a42 DQ |
167 | static int get_task_ioprio(struct task_struct *p) |
168 | { | |
169 | int ret; | |
170 | ||
171 | ret = security_task_getioprio(p); | |
172 | if (ret) | |
173 | goto out; | |
4b838d9e JK |
174 | task_lock(p); |
175 | ret = __get_task_ioprio(p); | |
176 | task_unlock(p); | |
177 | out: | |
178 | return ret; | |
179 | } | |
180 | ||
181 | /* | |
182 | * Return raw IO priority value as set by userspace. We use this for | |
183 | * ioprio_get(pid, IOPRIO_WHO_PROCESS) so that we keep historical behavior and | |
184 | * also so that userspace can distinguish unset IO priority (which just gets | |
185 | * overriden based on task's nice value) from IO priority set to some value. | |
186 | */ | |
187 | static int get_task_raw_ioprio(struct task_struct *p) | |
188 | { | |
189 | int ret; | |
190 | ||
191 | ret = security_task_getioprio(p); | |
192 | if (ret) | |
193 | goto out; | |
8ba86821 | 194 | task_lock(p); |
fd0928df JA |
195 | if (p->io_context) |
196 | ret = p->io_context->ioprio; | |
4b838d9e JK |
197 | else |
198 | ret = IOPRIO_DEFAULT; | |
8ba86821 | 199 | task_unlock(p); |
a1836a42 DQ |
200 | out: |
201 | return ret; | |
202 | } | |
203 | ||
fc25545e | 204 | static int ioprio_best(unsigned short aprio, unsigned short bprio) |
e014ff8d | 205 | { |
9a87182c | 206 | return min(aprio, bprio); |
e014ff8d ON |
207 | } |
208 | ||
938bb9f5 | 209 | SYSCALL_DEFINE2(ioprio_get, int, which, int, who) |
22e2c507 JA |
210 | { |
211 | struct task_struct *g, *p; | |
212 | struct user_struct *user; | |
41487c65 | 213 | struct pid *pgrp; |
7b44ab97 | 214 | kuid_t uid; |
22e2c507 | 215 | int ret = -ESRCH; |
a1836a42 | 216 | int tmpio; |
22e2c507 | 217 | |
d69b78ba | 218 | rcu_read_lock(); |
22e2c507 JA |
219 | switch (which) { |
220 | case IOPRIO_WHO_PROCESS: | |
221 | if (!who) | |
222 | p = current; | |
223 | else | |
228ebcbe | 224 | p = find_task_by_vpid(who); |
22e2c507 | 225 | if (p) |
4b838d9e | 226 | ret = get_task_raw_ioprio(p); |
22e2c507 JA |
227 | break; |
228 | case IOPRIO_WHO_PGRP: | |
229 | if (!who) | |
41487c65 EB |
230 | pgrp = task_pgrp(current); |
231 | else | |
b488893a | 232 | pgrp = find_vpid(who); |
e6a59aac | 233 | read_lock(&tasklist_lock); |
2d70b68d | 234 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
a1836a42 DQ |
235 | tmpio = get_task_ioprio(p); |
236 | if (tmpio < 0) | |
237 | continue; | |
22e2c507 | 238 | if (ret == -ESRCH) |
a1836a42 | 239 | ret = tmpio; |
22e2c507 | 240 | else |
a1836a42 | 241 | ret = ioprio_best(ret, tmpio); |
2d70b68d | 242 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
e6a59aac DB |
243 | read_unlock(&tasklist_lock); |
244 | ||
22e2c507 JA |
245 | break; |
246 | case IOPRIO_WHO_USER: | |
7b44ab97 | 247 | uid = make_kuid(current_user_ns(), who); |
22e2c507 | 248 | if (!who) |
86a264ab | 249 | user = current_user(); |
22e2c507 | 250 | else |
7b44ab97 | 251 | user = find_user(uid); |
22e2c507 JA |
252 | |
253 | if (!user) | |
254 | break; | |
255 | ||
612dafab | 256 | for_each_process_thread(g, p) { |
8639b461 BS |
257 | if (!uid_eq(task_uid(p), user->uid) || |
258 | !task_pid_vnr(p)) | |
22e2c507 | 259 | continue; |
a1836a42 DQ |
260 | tmpio = get_task_ioprio(p); |
261 | if (tmpio < 0) | |
262 | continue; | |
22e2c507 | 263 | if (ret == -ESRCH) |
a1836a42 | 264 | ret = tmpio; |
22e2c507 | 265 | else |
a1836a42 | 266 | ret = ioprio_best(ret, tmpio); |
612dafab | 267 | } |
22e2c507 JA |
268 | |
269 | if (who) | |
270 | free_uid(user); | |
271 | break; | |
272 | default: | |
273 | ret = -EINVAL; | |
274 | } | |
275 | ||
d69b78ba | 276 | rcu_read_unlock(); |
22e2c507 JA |
277 | return ret; |
278 | } |