Commit | Line | Data |
---|---|---|
36404b09 JA |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/kernel.h> | |
3 | #include <linux/errno.h> | |
4 | #include <linux/file.h> | |
5 | #include <linux/slab.h> | |
e6130eba | 6 | #include <linux/nospec.h> |
36404b09 JA |
7 | #include <linux/io_uring.h> |
8 | ||
9 | #include <uapi/linux/io_uring.h> | |
10 | ||
36404b09 | 11 | #include "io_uring.h" |
e6130eba JA |
12 | #include "rsrc.h" |
13 | #include "filetable.h" | |
50cf5f38 | 14 | #include "alloc_cache.h" |
36404b09 JA |
15 | #include "msg_ring.h" |
16 | ||
cbeb47a7 BL |
17 | /* All valid masks for MSG_RING */ |
18 | #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \ | |
19 | IORING_MSG_RING_FLAGS_PASS) | |
20 | ||
36404b09 JA |
21 | struct io_msg { |
22 | struct file *file; | |
11373026 | 23 | struct file *src_file; |
6d043ee1 | 24 | struct callback_head tw; |
36404b09 JA |
25 | u64 user_data; |
26 | u32 len; | |
e6130eba JA |
27 | u32 cmd; |
28 | u32 src_fd; | |
cbeb47a7 BL |
29 | union { |
30 | u32 dst_fd; | |
31 | u32 cqe_flags; | |
32 | }; | |
e6130eba | 33 | u32 flags; |
36404b09 JA |
34 | }; |
35 | ||
423d5081 JA |
36 | static void io_double_unlock_ctx(struct io_ring_ctx *octx) |
37 | { | |
38 | mutex_unlock(&octx->uring_lock); | |
39 | } | |
40 | ||
b0e9570a PB |
41 | static int io_lock_external_ctx(struct io_ring_ctx *octx, |
42 | unsigned int issue_flags) | |
423d5081 JA |
43 | { |
44 | /* | |
45 | * To ensure proper ordering between the two ctxs, we can only | |
46 | * attempt a trylock on the target. If that fails and we already have | |
47 | * the source ctx lock, punt to io-wq. | |
48 | */ | |
49 | if (!(issue_flags & IO_URING_F_UNLOCKED)) { | |
50 | if (!mutex_trylock(&octx->uring_lock)) | |
51 | return -EAGAIN; | |
52 | return 0; | |
53 | } | |
54 | mutex_lock(&octx->uring_lock); | |
55 | return 0; | |
56 | } | |
57 | ||
11373026 PB |
58 | void io_msg_ring_cleanup(struct io_kiocb *req) |
59 | { | |
60 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); | |
61 | ||
62 | if (WARN_ON_ONCE(!msg->src_file)) | |
63 | return; | |
64 | ||
65 | fput(msg->src_file); | |
66 | msg->src_file = NULL; | |
67 | } | |
68 | ||
56d8e318 PB |
69 | static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx) |
70 | { | |
d57afd8b | 71 | return target_ctx->task_complete; |
56d8e318 PB |
72 | } |
73 | ||
bcf8a029 | 74 | static void io_msg_tw_complete(struct io_kiocb *req, io_tw_token_t tw) |
56d8e318 | 75 | { |
0617bb50 | 76 | struct io_ring_ctx *ctx = req->ctx; |
56d8e318 | 77 | |
0617bb50 | 78 | io_add_aux_cqe(ctx, req->cqe.user_data, req->cqe.res, req->cqe.flags); |
50cf5f38 JA |
79 | if (spin_trylock(&ctx->msg_lock)) { |
80 | if (io_alloc_cache_put(&ctx->msg_cache, req)) | |
81 | req = NULL; | |
82 | spin_unlock(&ctx->msg_lock); | |
83 | } | |
84 | if (req) | |
be4f5d9c | 85 | kmem_cache_free(req_cachep, req); |
0617bb50 JA |
86 | percpu_ref_put(&ctx->refs); |
87 | } | |
56d8e318 | 88 | |
b0727b12 JA |
89 | static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, |
90 | int res, u32 cflags, u64 user_data) | |
0617bb50 | 91 | { |
69a62e03 | 92 | if (!READ_ONCE(ctx->submitter_task)) { |
b0727b12 JA |
93 | kmem_cache_free(req_cachep, req); |
94 | return -EOWNERDEAD; | |
95 | } | |
9cc0bbda | 96 | req->opcode = IORING_OP_NOP; |
0617bb50 JA |
97 | req->cqe.user_data = user_data; |
98 | io_req_set_res(req, res, cflags); | |
99 | percpu_ref_get(&ctx->refs); | |
100 | req->ctx = ctx; | |
69a62e03 | 101 | req->tctx = NULL; |
0617bb50 | 102 | req->io_task_work.func = io_msg_tw_complete; |
ea910678 | 103 | io_req_task_work_add_remote(req, IOU_F_TWQ_LAZY_WAKE); |
b0727b12 | 104 | return 0; |
56d8e318 PB |
105 | } |
106 | ||
50cf5f38 JA |
107 | static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx) |
108 | { | |
109 | struct io_kiocb *req = NULL; | |
110 | ||
111 | if (spin_trylock(&ctx->msg_lock)) { | |
112 | req = io_alloc_cache_get(&ctx->msg_cache); | |
113 | spin_unlock(&ctx->msg_lock); | |
0db4618e JA |
114 | if (req) |
115 | return req; | |
50cf5f38 | 116 | } |
0db4618e | 117 | return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); |
50cf5f38 JA |
118 | } |
119 | ||
95d6c922 JA |
120 | static int io_msg_data_remote(struct io_ring_ctx *target_ctx, |
121 | struct io_msg *msg) | |
6d043ee1 | 122 | { |
0617bb50 JA |
123 | struct io_kiocb *target; |
124 | u32 flags = 0; | |
6d043ee1 | 125 | |
95d6c922 | 126 | target = io_msg_get_kiocb(target_ctx); |
0617bb50 JA |
127 | if (unlikely(!target)) |
128 | return -ENOMEM; | |
129 | ||
130 | if (msg->flags & IORING_MSG_RING_FLAGS_PASS) | |
131 | flags = msg->cqe_flags; | |
132 | ||
b0727b12 JA |
133 | return io_msg_remote_post(target_ctx, target, msg->len, flags, |
134 | msg->user_data); | |
6d043ee1 PB |
135 | } |
136 | ||
95d6c922 JA |
137 | static int __io_msg_ring_data(struct io_ring_ctx *target_ctx, |
138 | struct io_msg *msg, unsigned int issue_flags) | |
e6130eba | 139 | { |
cbeb47a7 | 140 | u32 flags = 0; |
e12d7a46 | 141 | int ret; |
e6130eba | 142 | |
cbeb47a7 BL |
143 | if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS) |
144 | return -EINVAL; | |
145 | if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd) | |
e6130eba | 146 | return -EINVAL; |
8579538c PB |
147 | if (target_ctx->flags & IORING_SETUP_R_DISABLED) |
148 | return -EBADFD; | |
e6130eba | 149 | |
56d8e318 | 150 | if (io_msg_need_remote(target_ctx)) |
95d6c922 | 151 | return io_msg_data_remote(target_ctx, msg); |
6d043ee1 | 152 | |
cbeb47a7 BL |
153 | if (msg->flags & IORING_MSG_RING_FLAGS_PASS) |
154 | flags = msg->cqe_flags; | |
155 | ||
e12d7a46 JA |
156 | ret = -EOVERFLOW; |
157 | if (target_ctx->flags & IORING_SETUP_IOPOLL) { | |
b0e9570a | 158 | if (unlikely(io_lock_external_ctx(target_ctx, issue_flags))) |
e12d7a46 | 159 | return -EAGAIN; |
e12d7a46 | 160 | } |
59b28a6e JA |
161 | if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) |
162 | ret = 0; | |
163 | if (target_ctx->flags & IORING_SETUP_IOPOLL) | |
164 | io_double_unlock_ctx(target_ctx); | |
e12d7a46 | 165 | return ret; |
e6130eba JA |
166 | } |
167 | ||
95d6c922 JA |
168 | static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags) |
169 | { | |
170 | struct io_ring_ctx *target_ctx = req->file->private_data; | |
171 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); | |
172 | ||
173 | return __io_msg_ring_data(target_ctx, msg, issue_flags); | |
174 | } | |
175 | ||
b54a1404 | 176 | static int io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags) |
11373026 PB |
177 | { |
178 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); | |
179 | struct io_ring_ctx *ctx = req->ctx; | |
b54a1404 JA |
180 | struct io_rsrc_node *node; |
181 | int ret = -EBADF; | |
11373026 PB |
182 | |
183 | io_ring_submit_lock(ctx, issue_flags); | |
b54a1404 JA |
184 | node = io_rsrc_node_lookup(&ctx->file_table.data, msg->src_fd); |
185 | if (node) { | |
186 | msg->src_file = io_slot_file(node); | |
187 | if (msg->src_file) | |
188 | get_file(msg->src_file); | |
189 | req->flags |= REQ_F_NEED_CLEANUP; | |
190 | ret = 0; | |
e6130eba | 191 | } |
11373026 | 192 | io_ring_submit_unlock(ctx, issue_flags); |
b54a1404 | 193 | return ret; |
e6130eba JA |
194 | } |
195 | ||
17211310 | 196 | static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags) |
e6130eba JA |
197 | { |
198 | struct io_ring_ctx *target_ctx = req->file->private_data; | |
f2ccb5ae | 199 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
11373026 | 200 | struct file *src_file = msg->src_file; |
e6130eba JA |
201 | int ret; |
202 | ||
b0e9570a | 203 | if (unlikely(io_lock_external_ctx(target_ctx, issue_flags))) |
11373026 | 204 | return -EAGAIN; |
e6130eba JA |
205 | |
206 | ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd); | |
11373026 | 207 | if (ret < 0) |
e6130eba | 208 | goto out_unlock; |
17211310 | 209 | |
11373026 PB |
210 | msg->src_file = NULL; |
211 | req->flags &= ~REQ_F_NEED_CLEANUP; | |
e6130eba JA |
212 | |
213 | if (msg->flags & IORING_MSG_RING_CQE_SKIP) | |
214 | goto out_unlock; | |
e6130eba JA |
215 | /* |
216 | * If this fails, the target still received the file descriptor but | |
217 | * wasn't notified of the fact. This means that if this request | |
218 | * completes with -EOVERFLOW, then the sender must ensure that a | |
219 | * later IORING_OP_MSG_RING delivers the message. | |
220 | */ | |
5da28edd | 221 | if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0)) |
e6130eba JA |
222 | ret = -EOVERFLOW; |
223 | out_unlock: | |
423d5081 | 224 | io_double_unlock_ctx(target_ctx); |
e6130eba JA |
225 | return ret; |
226 | } | |
227 | ||
6d043ee1 PB |
228 | static void io_msg_tw_fd_complete(struct callback_head *head) |
229 | { | |
230 | struct io_msg *msg = container_of(head, struct io_msg, tw); | |
231 | struct io_kiocb *req = cmd_to_io_kiocb(msg); | |
232 | int ret = -EOWNERDEAD; | |
233 | ||
234 | if (!(current->flags & PF_EXITING)) | |
235 | ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED); | |
236 | if (ret < 0) | |
237 | req_set_fail(req); | |
238 | io_req_queue_tw_complete(req, ret); | |
239 | } | |
240 | ||
0617bb50 JA |
241 | static int io_msg_fd_remote(struct io_kiocb *req) |
242 | { | |
243 | struct io_ring_ctx *ctx = req->file->private_data; | |
244 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); | |
245 | struct task_struct *task = READ_ONCE(ctx->submitter_task); | |
246 | ||
247 | if (unlikely(!task)) | |
248 | return -EOWNERDEAD; | |
249 | ||
250 | init_task_work(&msg->tw, io_msg_tw_fd_complete); | |
251 | if (task_work_add(task, &msg->tw, TWA_SIGNAL)) | |
252 | return -EOWNERDEAD; | |
253 | ||
254 | return IOU_ISSUE_SKIP_COMPLETE; | |
255 | } | |
256 | ||
17211310 PB |
257 | static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) |
258 | { | |
259 | struct io_ring_ctx *target_ctx = req->file->private_data; | |
260 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); | |
261 | struct io_ring_ctx *ctx = req->ctx; | |
17211310 | 262 | |
5da28edd PB |
263 | if (msg->len) |
264 | return -EINVAL; | |
17211310 PB |
265 | if (target_ctx == ctx) |
266 | return -EINVAL; | |
8579538c PB |
267 | if (target_ctx->flags & IORING_SETUP_R_DISABLED) |
268 | return -EBADFD; | |
b54a1404 JA |
269 | if (!msg->src_file) { |
270 | int ret = io_msg_grab_file(req, issue_flags); | |
271 | if (unlikely(ret)) | |
272 | return ret; | |
17211310 | 273 | } |
6d043ee1 | 274 | |
56d8e318 | 275 | if (io_msg_need_remote(target_ctx)) |
0617bb50 | 276 | return io_msg_fd_remote(req); |
17211310 PB |
277 | return io_msg_install_complete(req, issue_flags); |
278 | } | |
279 | ||
95d6c922 | 280 | static int __io_msg_ring_prep(struct io_msg *msg, const struct io_uring_sqe *sqe) |
36404b09 | 281 | { |
e6130eba | 282 | if (unlikely(sqe->buf_index || sqe->personality)) |
36404b09 JA |
283 | return -EINVAL; |
284 | ||
11373026 | 285 | msg->src_file = NULL; |
36404b09 JA |
286 | msg->user_data = READ_ONCE(sqe->off); |
287 | msg->len = READ_ONCE(sqe->len); | |
e6130eba JA |
288 | msg->cmd = READ_ONCE(sqe->addr); |
289 | msg->src_fd = READ_ONCE(sqe->addr3); | |
290 | msg->dst_fd = READ_ONCE(sqe->file_index); | |
291 | msg->flags = READ_ONCE(sqe->msg_ring_flags); | |
cbeb47a7 | 292 | if (msg->flags & ~IORING_MSG_RING_MASK) |
e6130eba JA |
293 | return -EINVAL; |
294 | ||
36404b09 JA |
295 | return 0; |
296 | } | |
297 | ||
95d6c922 JA |
298 | int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) |
299 | { | |
300 | return __io_msg_ring_prep(io_kiocb_to_cmd(req, struct io_msg), sqe); | |
301 | } | |
302 | ||
36404b09 JA |
303 | int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) |
304 | { | |
f2ccb5ae | 305 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
36404b09 JA |
306 | int ret; |
307 | ||
308 | ret = -EBADFD; | |
309 | if (!io_is_uring_fops(req->file)) | |
310 | goto done; | |
311 | ||
e6130eba JA |
312 | switch (msg->cmd) { |
313 | case IORING_MSG_DATA: | |
e12d7a46 | 314 | ret = io_msg_ring_data(req, issue_flags); |
e6130eba JA |
315 | break; |
316 | case IORING_MSG_SEND_FD: | |
317 | ret = io_msg_send_fd(req, issue_flags); | |
318 | break; | |
319 | default: | |
320 | ret = -EINVAL; | |
321 | break; | |
322 | } | |
36404b09 JA |
323 | |
324 | done: | |
6d043ee1 PB |
325 | if (ret < 0) { |
326 | if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE) | |
327 | return ret; | |
36404b09 | 328 | req_set_fail(req); |
6d043ee1 | 329 | } |
36404b09 | 330 | io_req_set_res(req, ret, 0); |
8bb9d6cc | 331 | return IOU_COMPLETE; |
36404b09 | 332 | } |
50cf5f38 | 333 | |
a3771321 JA |
334 | int io_uring_sync_msg_ring(struct io_uring_sqe *sqe) |
335 | { | |
336 | struct io_msg io_msg = { }; | |
a3771321 JA |
337 | int ret; |
338 | ||
339 | ret = __io_msg_ring_prep(&io_msg, sqe); | |
340 | if (unlikely(ret)) | |
341 | return ret; | |
342 | ||
343 | /* | |
344 | * Only data sending supported, not IORING_MSG_SEND_FD as that one | |
345 | * doesn't make sense without a source ring to send files from. | |
346 | */ | |
347 | if (io_msg.cmd != IORING_MSG_DATA) | |
348 | return -EINVAL; | |
349 | ||
56cec28d AV |
350 | CLASS(fd, f)(sqe->fd); |
351 | if (fd_empty(f)) | |
352 | return -EBADF; | |
353 | if (!io_is_uring_fops(fd_file(f))) | |
354 | return -EBADFD; | |
355 | return __io_msg_ring_data(fd_file(f)->private_data, | |
356 | &io_msg, IO_URING_F_UNLOCKED); | |
a3771321 | 357 | } |