Commit | Line | Data |
---|---|---|
36404b09 JA |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/kernel.h> | |
3 | #include <linux/errno.h> | |
4 | #include <linux/file.h> | |
5 | #include <linux/slab.h> | |
e6130eba | 6 | #include <linux/nospec.h> |
36404b09 JA |
7 | #include <linux/io_uring.h> |
8 | ||
9 | #include <uapi/linux/io_uring.h> | |
10 | ||
36404b09 | 11 | #include "io_uring.h" |
e6130eba JA |
12 | #include "rsrc.h" |
13 | #include "filetable.h" | |
36404b09 JA |
14 | #include "msg_ring.h" |
15 | ||
cbeb47a7 BL |
16 | |
17 | /* All valid masks for MSG_RING */ | |
18 | #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \ | |
19 | IORING_MSG_RING_FLAGS_PASS) | |
20 | ||
36404b09 JA |
21 | struct io_msg { |
22 | struct file *file; | |
11373026 | 23 | struct file *src_file; |
6d043ee1 | 24 | struct callback_head tw; |
36404b09 JA |
25 | u64 user_data; |
26 | u32 len; | |
e6130eba JA |
27 | u32 cmd; |
28 | u32 src_fd; | |
cbeb47a7 BL |
29 | union { |
30 | u32 dst_fd; | |
31 | u32 cqe_flags; | |
32 | }; | |
e6130eba | 33 | u32 flags; |
36404b09 JA |
34 | }; |
35 | ||
423d5081 JA |
36 | static void io_double_unlock_ctx(struct io_ring_ctx *octx) |
37 | { | |
38 | mutex_unlock(&octx->uring_lock); | |
39 | } | |
40 | ||
41 | static int io_double_lock_ctx(struct io_ring_ctx *octx, | |
42 | unsigned int issue_flags) | |
43 | { | |
44 | /* | |
45 | * To ensure proper ordering between the two ctxs, we can only | |
46 | * attempt a trylock on the target. If that fails and we already have | |
47 | * the source ctx lock, punt to io-wq. | |
48 | */ | |
49 | if (!(issue_flags & IO_URING_F_UNLOCKED)) { | |
50 | if (!mutex_trylock(&octx->uring_lock)) | |
51 | return -EAGAIN; | |
52 | return 0; | |
53 | } | |
54 | mutex_lock(&octx->uring_lock); | |
55 | return 0; | |
56 | } | |
57 | ||
11373026 PB |
58 | void io_msg_ring_cleanup(struct io_kiocb *req) |
59 | { | |
60 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); | |
61 | ||
62 | if (WARN_ON_ONCE(!msg->src_file)) | |
63 | return; | |
64 | ||
65 | fput(msg->src_file); | |
66 | msg->src_file = NULL; | |
67 | } | |
68 | ||
56d8e318 PB |
69 | static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx) |
70 | { | |
71 | if (!target_ctx->task_complete) | |
72 | return false; | |
73 | return current != target_ctx->submitter_task; | |
74 | } | |
75 | ||
76 | static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func) | |
77 | { | |
78 | struct io_ring_ctx *ctx = req->file->private_data; | |
79 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); | |
8579538c PB |
80 | struct task_struct *task = READ_ONCE(ctx->submitter_task); |
81 | ||
82 | if (unlikely(!task)) | |
83 | return -EOWNERDEAD; | |
56d8e318 PB |
84 | |
85 | init_task_work(&msg->tw, func); | |
86 | if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL)) | |
87 | return -EOWNERDEAD; | |
88 | ||
89 | return IOU_ISSUE_SKIP_COMPLETE; | |
90 | } | |
91 | ||
6d043ee1 PB |
92 | static void io_msg_tw_complete(struct callback_head *head) |
93 | { | |
94 | struct io_msg *msg = container_of(head, struct io_msg, tw); | |
95 | struct io_kiocb *req = cmd_to_io_kiocb(msg); | |
96 | struct io_ring_ctx *target_ctx = req->file->private_data; | |
97 | int ret = 0; | |
98 | ||
e12d7a46 | 99 | if (current->flags & PF_EXITING) { |
6d043ee1 | 100 | ret = -EOWNERDEAD; |
e12d7a46 JA |
101 | } else { |
102 | /* | |
103 | * If the target ring is using IOPOLL mode, then we need to be | |
104 | * holding the uring_lock for posting completions. Other ring | |
105 | * types rely on the regular completion locking, which is | |
106 | * handled while posting. | |
107 | */ | |
108 | if (target_ctx->flags & IORING_SETUP_IOPOLL) | |
109 | mutex_lock(&target_ctx->uring_lock); | |
110 | if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) | |
111 | ret = -EOVERFLOW; | |
112 | if (target_ctx->flags & IORING_SETUP_IOPOLL) | |
113 | mutex_unlock(&target_ctx->uring_lock); | |
114 | } | |
6d043ee1 PB |
115 | |
116 | if (ret < 0) | |
117 | req_set_fail(req); | |
118 | io_req_queue_tw_complete(req, ret); | |
119 | } | |
120 | ||
e12d7a46 | 121 | static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags) |
e6130eba JA |
122 | { |
123 | struct io_ring_ctx *target_ctx = req->file->private_data; | |
f2ccb5ae | 124 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
cbeb47a7 | 125 | u32 flags = 0; |
e12d7a46 | 126 | int ret; |
e6130eba | 127 | |
cbeb47a7 BL |
128 | if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS) |
129 | return -EINVAL; | |
130 | if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd) | |
e6130eba | 131 | return -EINVAL; |
8579538c PB |
132 | if (target_ctx->flags & IORING_SETUP_R_DISABLED) |
133 | return -EBADFD; | |
e6130eba | 134 | |
56d8e318 PB |
135 | if (io_msg_need_remote(target_ctx)) |
136 | return io_msg_exec_remote(req, io_msg_tw_complete); | |
6d043ee1 | 137 | |
cbeb47a7 BL |
138 | if (msg->flags & IORING_MSG_RING_FLAGS_PASS) |
139 | flags = msg->cqe_flags; | |
140 | ||
e12d7a46 JA |
141 | ret = -EOVERFLOW; |
142 | if (target_ctx->flags & IORING_SETUP_IOPOLL) { | |
143 | if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) | |
144 | return -EAGAIN; | |
cbeb47a7 | 145 | if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) |
e12d7a46 JA |
146 | ret = 0; |
147 | io_double_unlock_ctx(target_ctx); | |
148 | } else { | |
cbeb47a7 | 149 | if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) |
e12d7a46 JA |
150 | ret = 0; |
151 | } | |
152 | return ret; | |
e6130eba JA |
153 | } |
154 | ||
11373026 PB |
155 | static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags) |
156 | { | |
157 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); | |
158 | struct io_ring_ctx *ctx = req->ctx; | |
159 | struct file *file = NULL; | |
160 | unsigned long file_ptr; | |
161 | int idx = msg->src_fd; | |
162 | ||
163 | io_ring_submit_lock(ctx, issue_flags); | |
164 | if (likely(idx < ctx->nr_user_files)) { | |
165 | idx = array_index_nospec(idx, ctx->nr_user_files); | |
166 | file_ptr = io_fixed_file_slot(&ctx->file_table, idx)->file_ptr; | |
167 | file = (struct file *) (file_ptr & FFS_MASK); | |
168 | if (file) | |
169 | get_file(file); | |
e6130eba | 170 | } |
11373026 PB |
171 | io_ring_submit_unlock(ctx, issue_flags); |
172 | return file; | |
e6130eba JA |
173 | } |
174 | ||
17211310 | 175 | static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags) |
e6130eba JA |
176 | { |
177 | struct io_ring_ctx *target_ctx = req->file->private_data; | |
f2ccb5ae | 178 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
11373026 | 179 | struct file *src_file = msg->src_file; |
e6130eba JA |
180 | int ret; |
181 | ||
11373026 PB |
182 | if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) |
183 | return -EAGAIN; | |
e6130eba JA |
184 | |
185 | ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd); | |
11373026 | 186 | if (ret < 0) |
e6130eba | 187 | goto out_unlock; |
17211310 | 188 | |
11373026 PB |
189 | msg->src_file = NULL; |
190 | req->flags &= ~REQ_F_NEED_CLEANUP; | |
e6130eba JA |
191 | |
192 | if (msg->flags & IORING_MSG_RING_CQE_SKIP) | |
193 | goto out_unlock; | |
e6130eba JA |
194 | /* |
195 | * If this fails, the target still received the file descriptor but | |
196 | * wasn't notified of the fact. This means that if this request | |
197 | * completes with -EOVERFLOW, then the sender must ensure that a | |
198 | * later IORING_OP_MSG_RING delivers the message. | |
199 | */ | |
b529c96a | 200 | if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) |
e6130eba JA |
201 | ret = -EOVERFLOW; |
202 | out_unlock: | |
423d5081 | 203 | io_double_unlock_ctx(target_ctx); |
e6130eba JA |
204 | return ret; |
205 | } | |
206 | ||
6d043ee1 PB |
207 | static void io_msg_tw_fd_complete(struct callback_head *head) |
208 | { | |
209 | struct io_msg *msg = container_of(head, struct io_msg, tw); | |
210 | struct io_kiocb *req = cmd_to_io_kiocb(msg); | |
211 | int ret = -EOWNERDEAD; | |
212 | ||
213 | if (!(current->flags & PF_EXITING)) | |
214 | ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED); | |
215 | if (ret < 0) | |
216 | req_set_fail(req); | |
217 | io_req_queue_tw_complete(req, ret); | |
218 | } | |
219 | ||
17211310 PB |
220 | static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) |
221 | { | |
222 | struct io_ring_ctx *target_ctx = req->file->private_data; | |
223 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); | |
224 | struct io_ring_ctx *ctx = req->ctx; | |
225 | struct file *src_file = msg->src_file; | |
226 | ||
227 | if (target_ctx == ctx) | |
228 | return -EINVAL; | |
8579538c PB |
229 | if (target_ctx->flags & IORING_SETUP_R_DISABLED) |
230 | return -EBADFD; | |
17211310 PB |
231 | if (!src_file) { |
232 | src_file = io_msg_grab_file(req, issue_flags); | |
233 | if (!src_file) | |
234 | return -EBADF; | |
235 | msg->src_file = src_file; | |
236 | req->flags |= REQ_F_NEED_CLEANUP; | |
237 | } | |
6d043ee1 | 238 | |
56d8e318 PB |
239 | if (io_msg_need_remote(target_ctx)) |
240 | return io_msg_exec_remote(req, io_msg_tw_fd_complete); | |
17211310 PB |
241 | return io_msg_install_complete(req, issue_flags); |
242 | } | |
243 | ||
36404b09 JA |
244 | int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) |
245 | { | |
f2ccb5ae | 246 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
36404b09 | 247 | |
e6130eba | 248 | if (unlikely(sqe->buf_index || sqe->personality)) |
36404b09 JA |
249 | return -EINVAL; |
250 | ||
11373026 | 251 | msg->src_file = NULL; |
36404b09 JA |
252 | msg->user_data = READ_ONCE(sqe->off); |
253 | msg->len = READ_ONCE(sqe->len); | |
e6130eba JA |
254 | msg->cmd = READ_ONCE(sqe->addr); |
255 | msg->src_fd = READ_ONCE(sqe->addr3); | |
256 | msg->dst_fd = READ_ONCE(sqe->file_index); | |
257 | msg->flags = READ_ONCE(sqe->msg_ring_flags); | |
cbeb47a7 | 258 | if (msg->flags & ~IORING_MSG_RING_MASK) |
e6130eba JA |
259 | return -EINVAL; |
260 | ||
36404b09 JA |
261 | return 0; |
262 | } | |
263 | ||
264 | int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) | |
265 | { | |
f2ccb5ae | 266 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
36404b09 JA |
267 | int ret; |
268 | ||
269 | ret = -EBADFD; | |
270 | if (!io_is_uring_fops(req->file)) | |
271 | goto done; | |
272 | ||
e6130eba JA |
273 | switch (msg->cmd) { |
274 | case IORING_MSG_DATA: | |
e12d7a46 | 275 | ret = io_msg_ring_data(req, issue_flags); |
e6130eba JA |
276 | break; |
277 | case IORING_MSG_SEND_FD: | |
278 | ret = io_msg_send_fd(req, issue_flags); | |
279 | break; | |
280 | default: | |
281 | ret = -EINVAL; | |
282 | break; | |
283 | } | |
36404b09 JA |
284 | |
285 | done: | |
6d043ee1 PB |
286 | if (ret < 0) { |
287 | if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE) | |
288 | return ret; | |
36404b09 | 289 | req_set_fail(req); |
6d043ee1 | 290 | } |
36404b09 | 291 | io_req_set_res(req, ret, 0); |
36404b09 JA |
292 | return IOU_OK; |
293 | } |