io_uring: account locked pages for non-fixed zc
[linux-block.git] / io_uring / net.c
CommitLineData
f9ead18c
JA
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/file.h>
5#include <linux/slab.h>
6#include <linux/net.h>
7#include <linux/compat.h>
8#include <net/compat.h>
9#include <linux/io_uring.h>
10
11#include <uapi/linux/io_uring.h>
12
f9ead18c 13#include "io_uring.h"
3b77495a 14#include "kbuf.h"
43e0bbbd 15#include "alloc_cache.h"
f9ead18c 16#include "net.h"
06a5464b 17#include "notif.h"
f9ead18c
JA
18
19#if defined(CONFIG_NET)
20struct io_shutdown {
21 struct file *file;
22 int how;
23};
24
25struct io_accept {
26 struct file *file;
27 struct sockaddr __user *addr;
28 int __user *addr_len;
29 int flags;
30 u32 file_slot;
31 unsigned long nofile;
32};
33
34struct io_socket {
35 struct file *file;
36 int domain;
37 int type;
38 int protocol;
39 int flags;
40 u32 file_slot;
41 unsigned long nofile;
42};
43
44struct io_connect {
45 struct file *file;
46 struct sockaddr __user *addr;
47 int addr_len;
48};
49
50struct io_sr_msg {
51 struct file *file;
52 union {
53 struct compat_msghdr __user *umsg_compat;
54 struct user_msghdr __user *umsg;
55 void __user *buf;
56 };
57 int msg_flags;
58 size_t len;
59 size_t done_io;
60 unsigned int flags;
61};
62
06a5464b
PB
63struct io_sendzc {
64 struct file *file;
65 void __user *buf;
66 size_t len;
67 u16 slot_idx;
68 unsigned msg_flags;
69 unsigned flags;
70};
71
f9ead18c
JA
72#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
73
74int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
75{
76 struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
77
78 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
79 sqe->buf_index || sqe->splice_fd_in))
80 return -EINVAL;
81
82 shutdown->how = READ_ONCE(sqe->len);
83 return 0;
84}
85
86int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
87{
88 struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
89 struct socket *sock;
90 int ret;
91
92 if (issue_flags & IO_URING_F_NONBLOCK)
93 return -EAGAIN;
94
95 sock = sock_from_file(req->file);
96 if (unlikely(!sock))
97 return -ENOTSOCK;
98
99 ret = __sys_shutdown_sock(sock, shutdown->how);
100 io_req_set_res(req, ret, 0);
101 return IOU_OK;
102}
103
104static bool io_net_retry(struct socket *sock, int flags)
105{
106 if (!(flags & MSG_WAITALL))
107 return false;
108 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
109}
110
43e0bbbd
JA
111static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
112{
113 struct io_async_msghdr *hdr = req->async_data;
114
115 if (!hdr || issue_flags & IO_URING_F_UNLOCKED)
116 return;
117
118 /* Let normal cleanup path reap it if we fail adding to the cache */
119 if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) {
120 req->async_data = NULL;
121 req->flags &= ~REQ_F_ASYNC_DATA;
122 }
123}
124
125static struct io_async_msghdr *io_recvmsg_alloc_async(struct io_kiocb *req,
126 unsigned int issue_flags)
127{
128 struct io_ring_ctx *ctx = req->ctx;
129 struct io_cache_entry *entry;
130
131 if (!(issue_flags & IO_URING_F_UNLOCKED) &&
132 (entry = io_alloc_cache_get(&ctx->netmsg_cache)) != NULL) {
133 struct io_async_msghdr *hdr;
134
135 hdr = container_of(entry, struct io_async_msghdr, cache);
136 req->flags |= REQ_F_ASYNC_DATA;
137 req->async_data = hdr;
138 return hdr;
139 }
140
141 if (!io_alloc_async_data(req))
142 return req->async_data;
143
144 return NULL;
145}
146
f9ead18c 147static int io_setup_async_msg(struct io_kiocb *req,
43e0bbbd
JA
148 struct io_async_msghdr *kmsg,
149 unsigned int issue_flags)
f9ead18c
JA
150{
151 struct io_async_msghdr *async_msg = req->async_data;
152
153 if (async_msg)
154 return -EAGAIN;
43e0bbbd
JA
155 async_msg = io_recvmsg_alloc_async(req, issue_flags);
156 if (!async_msg) {
f9ead18c
JA
157 kfree(kmsg->free_iov);
158 return -ENOMEM;
159 }
f9ead18c
JA
160 req->flags |= REQ_F_NEED_CLEANUP;
161 memcpy(async_msg, kmsg, sizeof(*kmsg));
162 async_msg->msg.msg_name = &async_msg->addr;
163 /* if were using fast_iov, set it to the new one */
164 if (!async_msg->free_iov)
165 async_msg->msg.msg_iter.iov = async_msg->fast_iov;
166
167 return -EAGAIN;
168}
169
170static int io_sendmsg_copy_hdr(struct io_kiocb *req,
171 struct io_async_msghdr *iomsg)
172{
173 struct io_sr_msg *sr = io_kiocb_to_cmd(req);
174
175 iomsg->msg.msg_name = &iomsg->addr;
176 iomsg->free_iov = iomsg->fast_iov;
177 return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
178 &iomsg->free_iov);
179}
180
181int io_sendmsg_prep_async(struct io_kiocb *req)
182{
183 int ret;
184
185 ret = io_sendmsg_copy_hdr(req, req->async_data);
186 if (!ret)
187 req->flags |= REQ_F_NEED_CLEANUP;
188 return ret;
189}
190
191void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
192{
193 struct io_async_msghdr *io = req->async_data;
194
195 kfree(io->free_iov);
196}
197
198int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
199{
200 struct io_sr_msg *sr = io_kiocb_to_cmd(req);
201
202 if (unlikely(sqe->file_index || sqe->addr2))
203 return -EINVAL;
204
205 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
206 sr->len = READ_ONCE(sqe->len);
207 sr->flags = READ_ONCE(sqe->ioprio);
208 if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
209 return -EINVAL;
210 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
211 if (sr->msg_flags & MSG_DONTWAIT)
212 req->flags |= REQ_F_NOWAIT;
213
214#ifdef CONFIG_COMPAT
215 if (req->ctx->compat)
216 sr->msg_flags |= MSG_CMSG_COMPAT;
217#endif
218 sr->done_io = 0;
219 return 0;
220}
221
222int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
223{
224 struct io_sr_msg *sr = io_kiocb_to_cmd(req);
225 struct io_async_msghdr iomsg, *kmsg;
226 struct socket *sock;
227 unsigned flags;
228 int min_ret = 0;
229 int ret;
230
231 sock = sock_from_file(req->file);
232 if (unlikely(!sock))
233 return -ENOTSOCK;
234
235 if (req_has_async_data(req)) {
236 kmsg = req->async_data;
237 } else {
238 ret = io_sendmsg_copy_hdr(req, &iomsg);
239 if (ret)
240 return ret;
241 kmsg = &iomsg;
242 }
243
244 if (!(req->flags & REQ_F_POLLED) &&
245 (sr->flags & IORING_RECVSEND_POLL_FIRST))
43e0bbbd 246 return io_setup_async_msg(req, kmsg, issue_flags);
f9ead18c
JA
247
248 flags = sr->msg_flags;
249 if (issue_flags & IO_URING_F_NONBLOCK)
250 flags |= MSG_DONTWAIT;
251 if (flags & MSG_WAITALL)
252 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
253
254 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
255
256 if (ret < min_ret) {
257 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
43e0bbbd 258 return io_setup_async_msg(req, kmsg, issue_flags);
f9ead18c
JA
259 if (ret == -ERESTARTSYS)
260 ret = -EINTR;
261 if (ret > 0 && io_net_retry(sock, flags)) {
262 sr->done_io += ret;
263 req->flags |= REQ_F_PARTIAL_IO;
43e0bbbd 264 return io_setup_async_msg(req, kmsg, issue_flags);
f9ead18c
JA
265 }
266 req_set_fail(req);
267 }
268 /* fast path, check for non-NULL to avoid function call */
269 if (kmsg->free_iov)
270 kfree(kmsg->free_iov);
271 req->flags &= ~REQ_F_NEED_CLEANUP;
43e0bbbd 272 io_netmsg_recycle(req, issue_flags);
f9ead18c
JA
273 if (ret >= 0)
274 ret += sr->done_io;
275 else if (sr->done_io)
276 ret = sr->done_io;
277 io_req_set_res(req, ret, 0);
278 return IOU_OK;
279}
280
281int io_send(struct io_kiocb *req, unsigned int issue_flags)
282{
283 struct io_sr_msg *sr = io_kiocb_to_cmd(req);
284 struct msghdr msg;
285 struct iovec iov;
286 struct socket *sock;
287 unsigned flags;
288 int min_ret = 0;
289 int ret;
290
291 if (!(req->flags & REQ_F_POLLED) &&
292 (sr->flags & IORING_RECVSEND_POLL_FIRST))
293 return -EAGAIN;
294
295 sock = sock_from_file(req->file);
296 if (unlikely(!sock))
297 return -ENOTSOCK;
298
299 ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
300 if (unlikely(ret))
301 return ret;
302
303 msg.msg_name = NULL;
304 msg.msg_control = NULL;
305 msg.msg_controllen = 0;
306 msg.msg_namelen = 0;
e02b6651 307 msg.msg_ubuf = NULL;
f9ead18c
JA
308
309 flags = sr->msg_flags;
310 if (issue_flags & IO_URING_F_NONBLOCK)
311 flags |= MSG_DONTWAIT;
312 if (flags & MSG_WAITALL)
313 min_ret = iov_iter_count(&msg.msg_iter);
314
315 msg.msg_flags = flags;
316 ret = sock_sendmsg(sock, &msg);
317 if (ret < min_ret) {
318 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
319 return -EAGAIN;
320 if (ret == -ERESTARTSYS)
321 ret = -EINTR;
322 if (ret > 0 && io_net_retry(sock, flags)) {
323 sr->len -= ret;
324 sr->buf += ret;
325 sr->done_io += ret;
326 req->flags |= REQ_F_PARTIAL_IO;
327 return -EAGAIN;
328 }
329 req_set_fail(req);
330 }
331 if (ret >= 0)
332 ret += sr->done_io;
333 else if (sr->done_io)
334 ret = sr->done_io;
335 io_req_set_res(req, ret, 0);
336 return IOU_OK;
337}
338
9bb66906
DY
339static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg)
340{
9b0fc3c0 341 int hdr;
9bb66906 342
9b0fc3c0 343 if (iomsg->namelen < 0)
9bb66906 344 return true;
9b0fc3c0
DY
345 if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out),
346 iomsg->namelen, &hdr))
9bb66906 347 return true;
9b0fc3c0 348 if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr))
9bb66906
DY
349 return true;
350
351 return false;
352}
353
f9ead18c
JA
354static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
355 struct io_async_msghdr *iomsg)
356{
357 struct io_sr_msg *sr = io_kiocb_to_cmd(req);
7fa875b8 358 struct user_msghdr msg;
f9ead18c
JA
359 int ret;
360
7fa875b8
DY
361 if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg)))
362 return -EFAULT;
363
364 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
f9ead18c
JA
365 if (ret)
366 return ret;
367
368 if (req->flags & REQ_F_BUFFER_SELECT) {
7fa875b8 369 if (msg.msg_iovlen == 0) {
5702196e
DY
370 sr->len = iomsg->fast_iov[0].iov_len = 0;
371 iomsg->fast_iov[0].iov_base = NULL;
372 iomsg->free_iov = NULL;
7fa875b8 373 } else if (msg.msg_iovlen > 1) {
f9ead18c 374 return -EINVAL;
5702196e 375 } else {
7fa875b8 376 if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov)))
5702196e
DY
377 return -EFAULT;
378 sr->len = iomsg->fast_iov[0].iov_len;
379 iomsg->free_iov = NULL;
380 }
9bb66906
DY
381
382 if (req->flags & REQ_F_APOLL_MULTISHOT) {
383 iomsg->namelen = msg.msg_namelen;
384 iomsg->controllen = msg.msg_controllen;
385 if (io_recvmsg_multishot_overflow(iomsg))
386 return -EOVERFLOW;
387 }
f9ead18c
JA
388 } else {
389 iomsg->free_iov = iomsg->fast_iov;
7fa875b8 390 ret = __import_iovec(READ, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV,
f9ead18c
JA
391 &iomsg->free_iov, &iomsg->msg.msg_iter,
392 false);
393 if (ret > 0)
394 ret = 0;
395 }
396
397 return ret;
398}
399
400#ifdef CONFIG_COMPAT
401static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
402 struct io_async_msghdr *iomsg)
403{
404 struct io_sr_msg *sr = io_kiocb_to_cmd(req);
72c531f8 405 struct compat_msghdr msg;
f9ead18c 406 struct compat_iovec __user *uiov;
f9ead18c
JA
407 int ret;
408
72c531f8
DY
409 if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg)))
410 return -EFAULT;
411
4f6a94d3 412 ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
f9ead18c
JA
413 if (ret)
414 return ret;
415
72c531f8 416 uiov = compat_ptr(msg.msg_iov);
f9ead18c
JA
417 if (req->flags & REQ_F_BUFFER_SELECT) {
418 compat_ssize_t clen;
419
72c531f8 420 if (msg.msg_iovlen == 0) {
6d2f75a0
DY
421 sr->len = 0;
422 iomsg->free_iov = NULL;
72c531f8 423 } else if (msg.msg_iovlen > 1) {
f9ead18c 424 return -EINVAL;
6d2f75a0
DY
425 } else {
426 if (!access_ok(uiov, sizeof(*uiov)))
427 return -EFAULT;
428 if (__get_user(clen, &uiov->iov_len))
429 return -EFAULT;
430 if (clen < 0)
431 return -EINVAL;
432 sr->len = clen;
433 iomsg->free_iov = NULL;
434 }
9bb66906
DY
435
436 if (req->flags & REQ_F_APOLL_MULTISHOT) {
437 iomsg->namelen = msg.msg_namelen;
438 iomsg->controllen = msg.msg_controllen;
439 if (io_recvmsg_multishot_overflow(iomsg))
440 return -EOVERFLOW;
441 }
f9ead18c
JA
442 } else {
443 iomsg->free_iov = iomsg->fast_iov;
72c531f8 444 ret = __import_iovec(READ, (struct iovec __user *)uiov, msg.msg_iovlen,
f9ead18c
JA
445 UIO_FASTIOV, &iomsg->free_iov,
446 &iomsg->msg.msg_iter, true);
447 if (ret < 0)
448 return ret;
449 }
450
451 return 0;
452}
453#endif
454
455static int io_recvmsg_copy_hdr(struct io_kiocb *req,
456 struct io_async_msghdr *iomsg)
457{
458 iomsg->msg.msg_name = &iomsg->addr;
459
460#ifdef CONFIG_COMPAT
461 if (req->ctx->compat)
462 return __io_compat_recvmsg_copy_hdr(req, iomsg);
463#endif
464
465 return __io_recvmsg_copy_hdr(req, iomsg);
466}
467
468int io_recvmsg_prep_async(struct io_kiocb *req)
469{
470 int ret;
471
472 ret = io_recvmsg_copy_hdr(req, req->async_data);
473 if (!ret)
474 req->flags |= REQ_F_NEED_CLEANUP;
475 return ret;
476}
477
b3fdea6e
DY
478#define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)
479
f9ead18c
JA
480int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
481{
482 struct io_sr_msg *sr = io_kiocb_to_cmd(req);
483
484 if (unlikely(sqe->file_index || sqe->addr2))
485 return -EINVAL;
486
487 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
488 sr->len = READ_ONCE(sqe->len);
489 sr->flags = READ_ONCE(sqe->ioprio);
b3fdea6e 490 if (sr->flags & ~(RECVMSG_FLAGS))
f9ead18c
JA
491 return -EINVAL;
492 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
493 if (sr->msg_flags & MSG_DONTWAIT)
494 req->flags |= REQ_F_NOWAIT;
495 if (sr->msg_flags & MSG_ERRQUEUE)
496 req->flags |= REQ_F_CLEAR_POLLIN;
b3fdea6e
DY
497 if (sr->flags & IORING_RECV_MULTISHOT) {
498 if (!(req->flags & REQ_F_BUFFER_SELECT))
499 return -EINVAL;
500 if (sr->msg_flags & MSG_WAITALL)
501 return -EINVAL;
502 if (req->opcode == IORING_OP_RECV && sr->len)
503 return -EINVAL;
504 req->flags |= REQ_F_APOLL_MULTISHOT;
505 }
f9ead18c
JA
506
507#ifdef CONFIG_COMPAT
508 if (req->ctx->compat)
509 sr->msg_flags |= MSG_CMSG_COMPAT;
510#endif
511 sr->done_io = 0;
512 return 0;
513}
514
b3fdea6e
DY
515static inline void io_recv_prep_retry(struct io_kiocb *req)
516{
517 struct io_sr_msg *sr = io_kiocb_to_cmd(req);
518
519 sr->done_io = 0;
520 sr->len = 0; /* get from the provided buffer */
521}
522
523/*
9bb66906 524 * Finishes io_recv and io_recvmsg.
b3fdea6e
DY
525 *
526 * Returns true if it is actually finished, or false if it should run
527 * again (for multishot).
528 */
9bb66906
DY
529static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
530 unsigned int cflags, bool mshot_finished)
b3fdea6e
DY
531{
532 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
533 io_req_set_res(req, *ret, cflags);
534 *ret = IOU_OK;
535 return true;
536 }
537
9bb66906 538 if (!mshot_finished) {
b3fdea6e
DY
539 if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret,
540 cflags | IORING_CQE_F_MORE, false)) {
541 io_recv_prep_retry(req);
542 return false;
543 }
544 /*
545 * Otherwise stop multishot but use the current result.
546 * Probably will end up going into overflow, but this means
547 * we cannot trust the ordering anymore
548 */
549 }
550
551 io_req_set_res(req, *ret, cflags);
552
553 if (req->flags & REQ_F_POLLED)
554 *ret = IOU_STOP_MULTISHOT;
e2df2ccb
DY
555 else
556 *ret = IOU_OK;
b3fdea6e
DY
557 return true;
558}
559
9bb66906
DY
560static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
561 struct io_sr_msg *sr, void __user **buf,
562 size_t *len)
563{
564 unsigned long ubuf = (unsigned long) *buf;
565 unsigned long hdr;
566
567 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
568 kmsg->controllen;
569 if (*len < hdr)
570 return -EFAULT;
571
572 if (kmsg->controllen) {
573 unsigned long control = ubuf + hdr - kmsg->controllen;
574
575 kmsg->msg.msg_control_user = (void *) control;
576 kmsg->msg.msg_controllen = kmsg->controllen;
577 }
578
579 sr->buf = *buf; /* stash for later copy */
580 *buf = (void *) (ubuf + hdr);
581 kmsg->payloadlen = *len = *len - hdr;
582 return 0;
583}
584
585struct io_recvmsg_multishot_hdr {
586 struct io_uring_recvmsg_out msg;
587 struct sockaddr_storage addr;
588};
589
590static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
591 struct io_async_msghdr *kmsg,
592 unsigned int flags, bool *finished)
593{
594 int err;
595 int copy_len;
596 struct io_recvmsg_multishot_hdr hdr;
597
598 if (kmsg->namelen)
599 kmsg->msg.msg_name = &hdr.addr;
600 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
601 kmsg->msg.msg_namelen = 0;
602
603 if (sock->file->f_flags & O_NONBLOCK)
604 flags |= MSG_DONTWAIT;
605
606 err = sock_recvmsg(sock, &kmsg->msg, flags);
607 *finished = err <= 0;
608 if (err < 0)
609 return err;
610
611 hdr.msg = (struct io_uring_recvmsg_out) {
612 .controllen = kmsg->controllen - kmsg->msg.msg_controllen,
613 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
614 };
615
616 hdr.msg.payloadlen = err;
617 if (err > kmsg->payloadlen)
618 err = kmsg->payloadlen;
619
620 copy_len = sizeof(struct io_uring_recvmsg_out);
621 if (kmsg->msg.msg_namelen > kmsg->namelen)
622 copy_len += kmsg->namelen;
623 else
624 copy_len += kmsg->msg.msg_namelen;
625
626 /*
627 * "fromlen shall refer to the value before truncation.."
628 * 1003.1g
629 */
630 hdr.msg.namelen = kmsg->msg.msg_namelen;
631
632 /* ensure that there is no gap between hdr and sockaddr_storage */
633 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
634 sizeof(struct io_uring_recvmsg_out));
635 if (copy_to_user(io->buf, &hdr, copy_len)) {
636 *finished = true;
637 return -EFAULT;
638 }
639
640 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
641 kmsg->controllen + err;
642}
643
f9ead18c
JA
644int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
645{
646 struct io_sr_msg *sr = io_kiocb_to_cmd(req);
647 struct io_async_msghdr iomsg, *kmsg;
648 struct socket *sock;
649 unsigned int cflags;
650 unsigned flags;
651 int ret, min_ret = 0;
652 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
9bb66906 653 bool mshot_finished = true;
f9ead18c
JA
654
655 sock = sock_from_file(req->file);
656 if (unlikely(!sock))
657 return -ENOTSOCK;
658
659 if (req_has_async_data(req)) {
660 kmsg = req->async_data;
661 } else {
662 ret = io_recvmsg_copy_hdr(req, &iomsg);
663 if (ret)
664 return ret;
665 kmsg = &iomsg;
666 }
667
668 if (!(req->flags & REQ_F_POLLED) &&
669 (sr->flags & IORING_RECVSEND_POLL_FIRST))
43e0bbbd 670 return io_setup_async_msg(req, kmsg, issue_flags);
f9ead18c 671
9bb66906 672retry_multishot:
f9ead18c
JA
673 if (io_do_buffer_select(req)) {
674 void __user *buf;
9bb66906 675 size_t len = sr->len;
f9ead18c 676
9bb66906 677 buf = io_buffer_select(req, &len, issue_flags);
f9ead18c
JA
678 if (!buf)
679 return -ENOBUFS;
9bb66906
DY
680
681 if (req->flags & REQ_F_APOLL_MULTISHOT) {
682 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
683 if (ret) {
684 io_kbuf_recycle(req, issue_flags);
685 return ret;
686 }
687 }
688
f9ead18c 689 kmsg->fast_iov[0].iov_base = buf;
9bb66906 690 kmsg->fast_iov[0].iov_len = len;
f9ead18c 691 iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
9bb66906 692 len);
f9ead18c
JA
693 }
694
695 flags = sr->msg_flags;
696 if (force_nonblock)
697 flags |= MSG_DONTWAIT;
698 if (flags & MSG_WAITALL)
699 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
700
701 kmsg->msg.msg_get_inq = 1;
9bb66906
DY
702 if (req->flags & REQ_F_APOLL_MULTISHOT)
703 ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
704 &mshot_finished);
705 else
706 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
707 kmsg->uaddr, flags);
708
f9ead18c 709 if (ret < min_ret) {
9bb66906
DY
710 if (ret == -EAGAIN && force_nonblock) {
711 ret = io_setup_async_msg(req, kmsg, issue_flags);
712 if (ret == -EAGAIN && (req->flags & IO_APOLL_MULTI_POLLED) ==
713 IO_APOLL_MULTI_POLLED) {
714 io_kbuf_recycle(req, issue_flags);
715 return IOU_ISSUE_SKIP_COMPLETE;
716 }
717 return ret;
718 }
f9ead18c
JA
719 if (ret == -ERESTARTSYS)
720 ret = -EINTR;
721 if (ret > 0 && io_net_retry(sock, flags)) {
722 sr->done_io += ret;
723 req->flags |= REQ_F_PARTIAL_IO;
43e0bbbd 724 return io_setup_async_msg(req, kmsg, issue_flags);
f9ead18c
JA
725 }
726 req_set_fail(req);
727 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
728 req_set_fail(req);
729 }
730
d4e097da 731 if (ret > 0)
f9ead18c
JA
732 ret += sr->done_io;
733 else if (sr->done_io)
734 ret = sr->done_io;
d4e097da
DY
735 else
736 io_kbuf_recycle(req, issue_flags);
737
f9ead18c
JA
738 cflags = io_put_kbuf(req, issue_flags);
739 if (kmsg->msg.msg_inq)
740 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
b3fdea6e 741
9bb66906
DY
742 if (!io_recv_finish(req, &ret, cflags, mshot_finished))
743 goto retry_multishot;
744
745 if (mshot_finished) {
746 io_netmsg_recycle(req, issue_flags);
747 /* fast path, check for non-NULL to avoid function call */
748 if (kmsg->free_iov)
749 kfree(kmsg->free_iov);
750 req->flags &= ~REQ_F_NEED_CLEANUP;
751 }
752
753 return ret;
f9ead18c
JA
754}
755
756int io_recv(struct io_kiocb *req, unsigned int issue_flags)
757{
758 struct io_sr_msg *sr = io_kiocb_to_cmd(req);
759 struct msghdr msg;
760 struct socket *sock;
761 struct iovec iov;
762 unsigned int cflags;
763 unsigned flags;
764 int ret, min_ret = 0;
765 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
b3fdea6e 766 size_t len = sr->len;
f9ead18c
JA
767
768 if (!(req->flags & REQ_F_POLLED) &&
769 (sr->flags & IORING_RECVSEND_POLL_FIRST))
770 return -EAGAIN;
771
772 sock = sock_from_file(req->file);
773 if (unlikely(!sock))
774 return -ENOTSOCK;
775
b3fdea6e 776retry_multishot:
f9ead18c
JA
777 if (io_do_buffer_select(req)) {
778 void __user *buf;
779
b3fdea6e 780 buf = io_buffer_select(req, &len, issue_flags);
f9ead18c
JA
781 if (!buf)
782 return -ENOBUFS;
783 sr->buf = buf;
784 }
785
b3fdea6e 786 ret = import_single_range(READ, sr->buf, len, &iov, &msg.msg_iter);
f9ead18c
JA
787 if (unlikely(ret))
788 goto out_free;
789
790 msg.msg_name = NULL;
791 msg.msg_namelen = 0;
792 msg.msg_control = NULL;
793 msg.msg_get_inq = 1;
794 msg.msg_flags = 0;
795 msg.msg_controllen = 0;
796 msg.msg_iocb = NULL;
e02b6651 797 msg.msg_ubuf = NULL;
f9ead18c
JA
798
799 flags = sr->msg_flags;
800 if (force_nonblock)
801 flags |= MSG_DONTWAIT;
802 if (flags & MSG_WAITALL)
803 min_ret = iov_iter_count(&msg.msg_iter);
804
805 ret = sock_recvmsg(sock, &msg, flags);
806 if (ret < min_ret) {
b3fdea6e
DY
807 if (ret == -EAGAIN && force_nonblock) {
808 if ((req->flags & IO_APOLL_MULTI_POLLED) == IO_APOLL_MULTI_POLLED) {
809 io_kbuf_recycle(req, issue_flags);
810 return IOU_ISSUE_SKIP_COMPLETE;
811 }
812
f9ead18c 813 return -EAGAIN;
b3fdea6e 814 }
f9ead18c
JA
815 if (ret == -ERESTARTSYS)
816 ret = -EINTR;
817 if (ret > 0 && io_net_retry(sock, flags)) {
818 sr->len -= ret;
819 sr->buf += ret;
820 sr->done_io += ret;
821 req->flags |= REQ_F_PARTIAL_IO;
822 return -EAGAIN;
823 }
824 req_set_fail(req);
825 } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
826out_free:
827 req_set_fail(req);
828 }
829
d4e097da 830 if (ret > 0)
f9ead18c
JA
831 ret += sr->done_io;
832 else if (sr->done_io)
833 ret = sr->done_io;
d4e097da
DY
834 else
835 io_kbuf_recycle(req, issue_flags);
836
f9ead18c
JA
837 cflags = io_put_kbuf(req, issue_flags);
838 if (msg.msg_inq)
839 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
b3fdea6e 840
9bb66906 841 if (!io_recv_finish(req, &ret, cflags, ret <= 0))
b3fdea6e
DY
842 goto retry_multishot;
843
844 return ret;
f9ead18c
JA
845}
846
06a5464b
PB
847int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
848{
849 struct io_sendzc *zc = io_kiocb_to_cmd(req);
850
851 if (READ_ONCE(sqe->addr2) || READ_ONCE(sqe->__pad2[0]) ||
852 READ_ONCE(sqe->addr3))
853 return -EINVAL;
854
855 zc->flags = READ_ONCE(sqe->ioprio);
856 if (zc->flags & ~IORING_RECVSEND_POLL_FIRST)
857 return -EINVAL;
858
859 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
860 zc->len = READ_ONCE(sqe->len);
861 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
862 zc->slot_idx = READ_ONCE(sqe->notification_idx);
863 if (zc->msg_flags & MSG_DONTWAIT)
864 req->flags |= REQ_F_NOWAIT;
865#ifdef CONFIG_COMPAT
866 if (req->ctx->compat)
867 zc->msg_flags |= MSG_CMSG_COMPAT;
868#endif
869 return 0;
870}
871
872int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
873{
874 struct io_ring_ctx *ctx = req->ctx;
875 struct io_sendzc *zc = io_kiocb_to_cmd(req);
876 struct io_notif_slot *notif_slot;
877 struct io_notif *notif;
878 struct msghdr msg;
879 struct iovec iov;
880 struct socket *sock;
881 unsigned msg_flags;
882 int ret, min_ret = 0;
883
884 if (!(req->flags & REQ_F_POLLED) &&
885 (zc->flags & IORING_RECVSEND_POLL_FIRST))
886 return -EAGAIN;
887
888 if (issue_flags & IO_URING_F_UNLOCKED)
889 return -EAGAIN;
890 sock = sock_from_file(req->file);
891 if (unlikely(!sock))
892 return -ENOTSOCK;
893
894 notif_slot = io_get_notif_slot(ctx, zc->slot_idx);
895 if (!notif_slot)
896 return -EINVAL;
897 notif = io_get_notif(ctx, notif_slot);
898 if (!notif)
899 return -ENOMEM;
900
901 msg.msg_name = NULL;
902 msg.msg_control = NULL;
903 msg.msg_controllen = 0;
904 msg.msg_namelen = 0;
905
906 ret = import_single_range(WRITE, zc->buf, zc->len, &iov, &msg.msg_iter);
907 if (unlikely(ret))
908 return ret;
e29e3bd4 909 mm_account_pinned_pages(&notif->uarg.mmp, zc->len);
06a5464b
PB
910
911 msg_flags = zc->msg_flags | MSG_ZEROCOPY;
912 if (issue_flags & IO_URING_F_NONBLOCK)
913 msg_flags |= MSG_DONTWAIT;
914 if (msg_flags & MSG_WAITALL)
915 min_ret = iov_iter_count(&msg.msg_iter);
916
917 msg.msg_flags = msg_flags;
918 msg.msg_ubuf = &notif->uarg;
919 msg.sg_from_iter = NULL;
920 ret = sock_sendmsg(sock, &msg);
921
922 if (unlikely(ret < min_ret)) {
923 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
924 return -EAGAIN;
925 return ret == -ERESTARTSYS ? -EINTR : ret;
926 }
927
928 io_req_set_res(req, ret, 0);
929 return IOU_OK;
930}
931
f9ead18c
JA
932int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
933{
934 struct io_accept *accept = io_kiocb_to_cmd(req);
935 unsigned flags;
936
937 if (sqe->len || sqe->buf_index)
938 return -EINVAL;
939
940 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
941 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
942 accept->flags = READ_ONCE(sqe->accept_flags);
943 accept->nofile = rlimit(RLIMIT_NOFILE);
944 flags = READ_ONCE(sqe->ioprio);
945 if (flags & ~IORING_ACCEPT_MULTISHOT)
946 return -EINVAL;
947
948 accept->file_slot = READ_ONCE(sqe->file_index);
949 if (accept->file_slot) {
950 if (accept->flags & SOCK_CLOEXEC)
951 return -EINVAL;
952 if (flags & IORING_ACCEPT_MULTISHOT &&
953 accept->file_slot != IORING_FILE_INDEX_ALLOC)
954 return -EINVAL;
955 }
956 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
957 return -EINVAL;
958 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
959 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
960 if (flags & IORING_ACCEPT_MULTISHOT)
961 req->flags |= REQ_F_APOLL_MULTISHOT;
962 return 0;
963}
964
965int io_accept(struct io_kiocb *req, unsigned int issue_flags)
966{
967 struct io_ring_ctx *ctx = req->ctx;
968 struct io_accept *accept = io_kiocb_to_cmd(req);
969 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
970 unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
971 bool fixed = !!accept->file_slot;
972 struct file *file;
973 int ret, fd;
974
975retry:
976 if (!fixed) {
977 fd = __get_unused_fd_flags(accept->flags, accept->nofile);
978 if (unlikely(fd < 0))
979 return fd;
980 }
981 file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
982 accept->flags);
983 if (IS_ERR(file)) {
984 if (!fixed)
985 put_unused_fd(fd);
986 ret = PTR_ERR(file);
987 if (ret == -EAGAIN && force_nonblock) {
988 /*
989 * if it's multishot and polled, we don't need to
990 * return EAGAIN to arm the poll infra since it
991 * has already been done
992 */
993 if ((req->flags & IO_APOLL_MULTI_POLLED) ==
994 IO_APOLL_MULTI_POLLED)
995 ret = IOU_ISSUE_SKIP_COMPLETE;
996 return ret;
997 }
998 if (ret == -ERESTARTSYS)
999 ret = -EINTR;
1000 req_set_fail(req);
1001 } else if (!fixed) {
1002 fd_install(fd, file);
1003 ret = fd;
1004 } else {
1005 ret = io_fixed_fd_install(req, issue_flags, file,
1006 accept->file_slot);
1007 }
1008
1009 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
1010 io_req_set_res(req, ret, 0);
1011 return IOU_OK;
1012 }
f9ead18c 1013
cbd25748
DY
1014 if (ret >= 0 &&
1015 io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false))
d245bca6 1016 goto retry;
cbd25748
DY
1017
1018 io_req_set_res(req, ret, 0);
1019 if (req->flags & REQ_F_POLLED)
1020 return IOU_STOP_MULTISHOT;
1021 return IOU_OK;
f9ead18c
JA
1022}
1023
1024int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1025{
1026 struct io_socket *sock = io_kiocb_to_cmd(req);
1027
1028 if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1029 return -EINVAL;
1030
1031 sock->domain = READ_ONCE(sqe->fd);
1032 sock->type = READ_ONCE(sqe->off);
1033 sock->protocol = READ_ONCE(sqe->len);
1034 sock->file_slot = READ_ONCE(sqe->file_index);
1035 sock->nofile = rlimit(RLIMIT_NOFILE);
1036
1037 sock->flags = sock->type & ~SOCK_TYPE_MASK;
1038 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1039 return -EINVAL;
1040 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1041 return -EINVAL;
1042 return 0;
1043}
1044
1045int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1046{
1047 struct io_socket *sock = io_kiocb_to_cmd(req);
1048 bool fixed = !!sock->file_slot;
1049 struct file *file;
1050 int ret, fd;
1051
1052 if (!fixed) {
1053 fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1054 if (unlikely(fd < 0))
1055 return fd;
1056 }
1057 file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1058 if (IS_ERR(file)) {
1059 if (!fixed)
1060 put_unused_fd(fd);
1061 ret = PTR_ERR(file);
1062 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1063 return -EAGAIN;
1064 if (ret == -ERESTARTSYS)
1065 ret = -EINTR;
1066 req_set_fail(req);
1067 } else if (!fixed) {
1068 fd_install(fd, file);
1069 ret = fd;
1070 } else {
1071 ret = io_fixed_fd_install(req, issue_flags, file,
1072 sock->file_slot);
1073 }
1074 io_req_set_res(req, ret, 0);
1075 return IOU_OK;
1076}
1077
1078int io_connect_prep_async(struct io_kiocb *req)
1079{
1080 struct io_async_connect *io = req->async_data;
1081 struct io_connect *conn = io_kiocb_to_cmd(req);
1082
1083 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
1084}
1085
1086int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1087{
1088 struct io_connect *conn = io_kiocb_to_cmd(req);
1089
1090 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1091 return -EINVAL;
1092
1093 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1094 conn->addr_len = READ_ONCE(sqe->addr2);
1095 return 0;
1096}
1097
1098int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1099{
1100 struct io_connect *connect = io_kiocb_to_cmd(req);
1101 struct io_async_connect __io, *io;
1102 unsigned file_flags;
1103 int ret;
1104 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1105
1106 if (req_has_async_data(req)) {
1107 io = req->async_data;
1108 } else {
1109 ret = move_addr_to_kernel(connect->addr,
1110 connect->addr_len,
1111 &__io.address);
1112 if (ret)
1113 goto out;
1114 io = &__io;
1115 }
1116
1117 file_flags = force_nonblock ? O_NONBLOCK : 0;
1118
1119 ret = __sys_connect_file(req->file, &io->address,
1120 connect->addr_len, file_flags);
1121 if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
1122 if (req_has_async_data(req))
1123 return -EAGAIN;
1124 if (io_alloc_async_data(req)) {
1125 ret = -ENOMEM;
1126 goto out;
1127 }
1128 memcpy(req->async_data, &__io, sizeof(__io));
1129 return -EAGAIN;
1130 }
1131 if (ret == -ERESTARTSYS)
1132 ret = -EINTR;
1133out:
1134 if (ret < 0)
1135 req_set_fail(req);
1136 io_req_set_res(req, ret, 0);
1137 return IOU_OK;
1138}
43e0bbbd
JA
1139
1140void io_netmsg_cache_free(struct io_cache_entry *entry)
1141{
1142 kfree(container_of(entry, struct io_async_msghdr, cache));
1143}
f9ead18c 1144#endif