io_uring/rw: don't lose partial IO result on fail
[linux-2.6-block.git] / io_uring / net.c
CommitLineData
f9ead18c
JA
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/file.h>
5#include <linux/slab.h>
6#include <linux/net.h>
7#include <linux/compat.h>
8#include <net/compat.h>
9#include <linux/io_uring.h>
10
11#include <uapi/linux/io_uring.h>
12
f9ead18c 13#include "io_uring.h"
3b77495a 14#include "kbuf.h"
43e0bbbd 15#include "alloc_cache.h"
f9ead18c 16#include "net.h"
06a5464b 17#include "notif.h"
10c7d33e 18#include "rsrc.h"
f9ead18c
JA
19
20#if defined(CONFIG_NET)
21struct io_shutdown {
22 struct file *file;
23 int how;
24};
25
26struct io_accept {
27 struct file *file;
28 struct sockaddr __user *addr;
29 int __user *addr_len;
30 int flags;
31 u32 file_slot;
32 unsigned long nofile;
33};
34
35struct io_socket {
36 struct file *file;
37 int domain;
38 int type;
39 int protocol;
40 int flags;
41 u32 file_slot;
42 unsigned long nofile;
43};
44
45struct io_connect {
46 struct file *file;
47 struct sockaddr __user *addr;
48 int addr_len;
49};
50
51struct io_sr_msg {
52 struct file *file;
53 union {
54 struct compat_msghdr __user *umsg_compat;
55 struct user_msghdr __user *umsg;
56 void __user *buf;
57 };
0b048557
PB
58 unsigned len;
59 unsigned done_io;
293402e5 60 unsigned msg_flags;
0b048557 61 u16 flags;
ac9e5784 62 /* used only for sendzc */
0b048557 63 u16 addr_len;
092aeedb 64 void __user *addr;
b48c312b 65 struct io_kiocb *notif;
06a5464b
PB
66};
67
f9ead18c
JA
68#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
69
70int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
71{
f2ccb5ae 72 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
f9ead18c
JA
73
74 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
75 sqe->buf_index || sqe->splice_fd_in))
76 return -EINVAL;
77
78 shutdown->how = READ_ONCE(sqe->len);
79 return 0;
80}
81
82int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
83{
f2ccb5ae 84 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
f9ead18c
JA
85 struct socket *sock;
86 int ret;
87
88 if (issue_flags & IO_URING_F_NONBLOCK)
89 return -EAGAIN;
90
91 sock = sock_from_file(req->file);
92 if (unlikely(!sock))
93 return -ENOTSOCK;
94
95 ret = __sys_shutdown_sock(sock, shutdown->how);
96 io_req_set_res(req, ret, 0);
97 return IOU_OK;
98}
99
100static bool io_net_retry(struct socket *sock, int flags)
101{
102 if (!(flags & MSG_WAITALL))
103 return false;
104 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
105}
106
43e0bbbd
JA
107static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
108{
109 struct io_async_msghdr *hdr = req->async_data;
110
06360426 111 if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED)
43e0bbbd
JA
112 return;
113
114 /* Let normal cleanup path reap it if we fail adding to the cache */
115 if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) {
116 req->async_data = NULL;
117 req->flags &= ~REQ_F_ASYNC_DATA;
118 }
119}
120
858c293e
PB
121static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req,
122 unsigned int issue_flags)
43e0bbbd
JA
123{
124 struct io_ring_ctx *ctx = req->ctx;
125 struct io_cache_entry *entry;
126
127 if (!(issue_flags & IO_URING_F_UNLOCKED) &&
128 (entry = io_alloc_cache_get(&ctx->netmsg_cache)) != NULL) {
129 struct io_async_msghdr *hdr;
130
131 hdr = container_of(entry, struct io_async_msghdr, cache);
132 req->flags |= REQ_F_ASYNC_DATA;
133 req->async_data = hdr;
134 return hdr;
135 }
136
137 if (!io_alloc_async_data(req))
138 return req->async_data;
139
140 return NULL;
141}
142
858c293e
PB
143static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req)
144{
145 /* ->prep_async is always called from the submission context */
146 return io_msg_alloc_async(req, 0);
147}
148
f9ead18c 149static int io_setup_async_msg(struct io_kiocb *req,
43e0bbbd
JA
150 struct io_async_msghdr *kmsg,
151 unsigned int issue_flags)
f9ead18c 152{
3f743e9b 153 struct io_async_msghdr *async_msg;
f9ead18c 154
3f743e9b 155 if (req_has_async_data(req))
f9ead18c 156 return -EAGAIN;
858c293e 157 async_msg = io_msg_alloc_async(req, issue_flags);
43e0bbbd 158 if (!async_msg) {
f9ead18c
JA
159 kfree(kmsg->free_iov);
160 return -ENOMEM;
161 }
f9ead18c
JA
162 req->flags |= REQ_F_NEED_CLEANUP;
163 memcpy(async_msg, kmsg, sizeof(*kmsg));
164 async_msg->msg.msg_name = &async_msg->addr;
165 /* if were using fast_iov, set it to the new one */
166 if (!async_msg->free_iov)
167 async_msg->msg.msg_iter.iov = async_msg->fast_iov;
168
169 return -EAGAIN;
170}
171
172static int io_sendmsg_copy_hdr(struct io_kiocb *req,
173 struct io_async_msghdr *iomsg)
174{
f2ccb5ae 175 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
f9ead18c
JA
176
177 iomsg->msg.msg_name = &iomsg->addr;
178 iomsg->free_iov = iomsg->fast_iov;
179 return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
180 &iomsg->free_iov);
181}
182
581711c4
PB
183int io_sendzc_prep_async(struct io_kiocb *req)
184{
ac9e5784 185 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
581711c4
PB
186 struct io_async_msghdr *io;
187 int ret;
188
189 if (!zc->addr || req_has_async_data(req))
190 return 0;
6bf8ad25
PB
191 io = io_msg_alloc_async_prep(req);
192 if (!io)
581711c4 193 return -ENOMEM;
581711c4
PB
194 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr);
195 return ret;
196}
197
198static int io_setup_async_addr(struct io_kiocb *req,
199 struct sockaddr_storage *addr,
200 unsigned int issue_flags)
201{
202 struct io_async_msghdr *io;
203
204 if (!addr || req_has_async_data(req))
205 return -EAGAIN;
6bf8ad25
PB
206 io = io_msg_alloc_async(req, issue_flags);
207 if (!io)
581711c4 208 return -ENOMEM;
581711c4
PB
209 memcpy(&io->addr, addr, sizeof(io->addr));
210 return -EAGAIN;
211}
212
f9ead18c
JA
213int io_sendmsg_prep_async(struct io_kiocb *req)
214{
215 int ret;
216
858c293e
PB
217 if (!io_msg_alloc_async_prep(req))
218 return -ENOMEM;
f9ead18c
JA
219 ret = io_sendmsg_copy_hdr(req, req->async_data);
220 if (!ret)
221 req->flags |= REQ_F_NEED_CLEANUP;
222 return ret;
223}
224
225void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
226{
227 struct io_async_msghdr *io = req->async_data;
228
229 kfree(io->free_iov);
230}
231
232int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
233{
f2ccb5ae 234 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
f9ead18c
JA
235
236 if (unlikely(sqe->file_index || sqe->addr2))
237 return -EINVAL;
238
239 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
240 sr->len = READ_ONCE(sqe->len);
241 sr->flags = READ_ONCE(sqe->ioprio);
242 if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
243 return -EINVAL;
244 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
245 if (sr->msg_flags & MSG_DONTWAIT)
246 req->flags |= REQ_F_NOWAIT;
247
248#ifdef CONFIG_COMPAT
249 if (req->ctx->compat)
250 sr->msg_flags |= MSG_CMSG_COMPAT;
251#endif
252 sr->done_io = 0;
253 return 0;
254}
255
256int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
257{
f2ccb5ae 258 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
f9ead18c
JA
259 struct io_async_msghdr iomsg, *kmsg;
260 struct socket *sock;
261 unsigned flags;
262 int min_ret = 0;
263 int ret;
264
265 sock = sock_from_file(req->file);
266 if (unlikely(!sock))
267 return -ENOTSOCK;
268
269 if (req_has_async_data(req)) {
270 kmsg = req->async_data;
271 } else {
272 ret = io_sendmsg_copy_hdr(req, &iomsg);
273 if (ret)
274 return ret;
275 kmsg = &iomsg;
276 }
277
278 if (!(req->flags & REQ_F_POLLED) &&
279 (sr->flags & IORING_RECVSEND_POLL_FIRST))
43e0bbbd 280 return io_setup_async_msg(req, kmsg, issue_flags);
f9ead18c
JA
281
282 flags = sr->msg_flags;
283 if (issue_flags & IO_URING_F_NONBLOCK)
284 flags |= MSG_DONTWAIT;
285 if (flags & MSG_WAITALL)
286 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
287
288 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
289
290 if (ret < min_ret) {
291 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
43e0bbbd 292 return io_setup_async_msg(req, kmsg, issue_flags);
f9ead18c
JA
293 if (ret > 0 && io_net_retry(sock, flags)) {
294 sr->done_io += ret;
295 req->flags |= REQ_F_PARTIAL_IO;
43e0bbbd 296 return io_setup_async_msg(req, kmsg, issue_flags);
f9ead18c 297 }
95eafc74
PB
298 if (ret == -ERESTARTSYS)
299 ret = -EINTR;
f9ead18c
JA
300 req_set_fail(req);
301 }
302 /* fast path, check for non-NULL to avoid function call */
303 if (kmsg->free_iov)
304 kfree(kmsg->free_iov);
305 req->flags &= ~REQ_F_NEED_CLEANUP;
43e0bbbd 306 io_netmsg_recycle(req, issue_flags);
f9ead18c
JA
307 if (ret >= 0)
308 ret += sr->done_io;
309 else if (sr->done_io)
310 ret = sr->done_io;
311 io_req_set_res(req, ret, 0);
312 return IOU_OK;
313}
314
315int io_send(struct io_kiocb *req, unsigned int issue_flags)
316{
f2ccb5ae 317 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
f9ead18c
JA
318 struct msghdr msg;
319 struct iovec iov;
320 struct socket *sock;
321 unsigned flags;
322 int min_ret = 0;
323 int ret;
324
325 if (!(req->flags & REQ_F_POLLED) &&
326 (sr->flags & IORING_RECVSEND_POLL_FIRST))
327 return -EAGAIN;
328
329 sock = sock_from_file(req->file);
330 if (unlikely(!sock))
331 return -ENOTSOCK;
332
333 ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
334 if (unlikely(ret))
335 return ret;
336
337 msg.msg_name = NULL;
338 msg.msg_control = NULL;
339 msg.msg_controllen = 0;
340 msg.msg_namelen = 0;
e02b6651 341 msg.msg_ubuf = NULL;
f9ead18c
JA
342
343 flags = sr->msg_flags;
344 if (issue_flags & IO_URING_F_NONBLOCK)
345 flags |= MSG_DONTWAIT;
346 if (flags & MSG_WAITALL)
347 min_ret = iov_iter_count(&msg.msg_iter);
348
349 msg.msg_flags = flags;
350 ret = sock_sendmsg(sock, &msg);
351 if (ret < min_ret) {
352 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
353 return -EAGAIN;
f9ead18c
JA
354 if (ret > 0 && io_net_retry(sock, flags)) {
355 sr->len -= ret;
356 sr->buf += ret;
357 sr->done_io += ret;
358 req->flags |= REQ_F_PARTIAL_IO;
359 return -EAGAIN;
360 }
95eafc74
PB
361 if (ret == -ERESTARTSYS)
362 ret = -EINTR;
f9ead18c
JA
363 req_set_fail(req);
364 }
365 if (ret >= 0)
366 ret += sr->done_io;
367 else if (sr->done_io)
368 ret = sr->done_io;
369 io_req_set_res(req, ret, 0);
370 return IOU_OK;
371}
372
9bb66906
DY
373static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg)
374{
9b0fc3c0 375 int hdr;
9bb66906 376
9b0fc3c0 377 if (iomsg->namelen < 0)
9bb66906 378 return true;
9b0fc3c0
DY
379 if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out),
380 iomsg->namelen, &hdr))
9bb66906 381 return true;
9b0fc3c0 382 if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr))
9bb66906
DY
383 return true;
384
385 return false;
386}
387
f9ead18c
JA
388static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
389 struct io_async_msghdr *iomsg)
390{
f2ccb5ae 391 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
7fa875b8 392 struct user_msghdr msg;
f9ead18c
JA
393 int ret;
394
7fa875b8
DY
395 if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg)))
396 return -EFAULT;
397
398 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
f9ead18c
JA
399 if (ret)
400 return ret;
401
402 if (req->flags & REQ_F_BUFFER_SELECT) {
7fa875b8 403 if (msg.msg_iovlen == 0) {
5702196e
DY
404 sr->len = iomsg->fast_iov[0].iov_len = 0;
405 iomsg->fast_iov[0].iov_base = NULL;
406 iomsg->free_iov = NULL;
7fa875b8 407 } else if (msg.msg_iovlen > 1) {
f9ead18c 408 return -EINVAL;
5702196e 409 } else {
7fa875b8 410 if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov)))
5702196e
DY
411 return -EFAULT;
412 sr->len = iomsg->fast_iov[0].iov_len;
413 iomsg->free_iov = NULL;
414 }
9bb66906
DY
415
416 if (req->flags & REQ_F_APOLL_MULTISHOT) {
417 iomsg->namelen = msg.msg_namelen;
418 iomsg->controllen = msg.msg_controllen;
419 if (io_recvmsg_multishot_overflow(iomsg))
420 return -EOVERFLOW;
421 }
f9ead18c
JA
422 } else {
423 iomsg->free_iov = iomsg->fast_iov;
7fa875b8 424 ret = __import_iovec(READ, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV,
f9ead18c
JA
425 &iomsg->free_iov, &iomsg->msg.msg_iter,
426 false);
427 if (ret > 0)
428 ret = 0;
429 }
430
431 return ret;
432}
433
434#ifdef CONFIG_COMPAT
435static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
436 struct io_async_msghdr *iomsg)
437{
f2ccb5ae 438 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
72c531f8 439 struct compat_msghdr msg;
f9ead18c 440 struct compat_iovec __user *uiov;
f9ead18c
JA
441 int ret;
442
72c531f8
DY
443 if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg)))
444 return -EFAULT;
445
4f6a94d3 446 ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
f9ead18c
JA
447 if (ret)
448 return ret;
449
72c531f8 450 uiov = compat_ptr(msg.msg_iov);
f9ead18c
JA
451 if (req->flags & REQ_F_BUFFER_SELECT) {
452 compat_ssize_t clen;
453
72c531f8 454 if (msg.msg_iovlen == 0) {
6d2f75a0
DY
455 sr->len = 0;
456 iomsg->free_iov = NULL;
72c531f8 457 } else if (msg.msg_iovlen > 1) {
f9ead18c 458 return -EINVAL;
6d2f75a0
DY
459 } else {
460 if (!access_ok(uiov, sizeof(*uiov)))
461 return -EFAULT;
462 if (__get_user(clen, &uiov->iov_len))
463 return -EFAULT;
464 if (clen < 0)
465 return -EINVAL;
466 sr->len = clen;
467 iomsg->free_iov = NULL;
468 }
9bb66906
DY
469
470 if (req->flags & REQ_F_APOLL_MULTISHOT) {
471 iomsg->namelen = msg.msg_namelen;
472 iomsg->controllen = msg.msg_controllen;
473 if (io_recvmsg_multishot_overflow(iomsg))
474 return -EOVERFLOW;
475 }
f9ead18c
JA
476 } else {
477 iomsg->free_iov = iomsg->fast_iov;
72c531f8 478 ret = __import_iovec(READ, (struct iovec __user *)uiov, msg.msg_iovlen,
f9ead18c
JA
479 UIO_FASTIOV, &iomsg->free_iov,
480 &iomsg->msg.msg_iter, true);
481 if (ret < 0)
482 return ret;
483 }
484
485 return 0;
486}
487#endif
488
489static int io_recvmsg_copy_hdr(struct io_kiocb *req,
490 struct io_async_msghdr *iomsg)
491{
492 iomsg->msg.msg_name = &iomsg->addr;
493
494#ifdef CONFIG_COMPAT
495 if (req->ctx->compat)
496 return __io_compat_recvmsg_copy_hdr(req, iomsg);
497#endif
498
499 return __io_recvmsg_copy_hdr(req, iomsg);
500}
501
502int io_recvmsg_prep_async(struct io_kiocb *req)
503{
504 int ret;
505
858c293e
PB
506 if (!io_msg_alloc_async_prep(req))
507 return -ENOMEM;
f9ead18c
JA
508 ret = io_recvmsg_copy_hdr(req, req->async_data);
509 if (!ret)
510 req->flags |= REQ_F_NEED_CLEANUP;
511 return ret;
512}
513
b3fdea6e
DY
514#define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)
515
f9ead18c
JA
516int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
517{
f2ccb5ae 518 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
f9ead18c
JA
519
520 if (unlikely(sqe->file_index || sqe->addr2))
521 return -EINVAL;
522
523 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
524 sr->len = READ_ONCE(sqe->len);
525 sr->flags = READ_ONCE(sqe->ioprio);
b3fdea6e 526 if (sr->flags & ~(RECVMSG_FLAGS))
f9ead18c
JA
527 return -EINVAL;
528 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
529 if (sr->msg_flags & MSG_DONTWAIT)
530 req->flags |= REQ_F_NOWAIT;
531 if (sr->msg_flags & MSG_ERRQUEUE)
532 req->flags |= REQ_F_CLEAR_POLLIN;
b3fdea6e
DY
533 if (sr->flags & IORING_RECV_MULTISHOT) {
534 if (!(req->flags & REQ_F_BUFFER_SELECT))
535 return -EINVAL;
536 if (sr->msg_flags & MSG_WAITALL)
537 return -EINVAL;
538 if (req->opcode == IORING_OP_RECV && sr->len)
539 return -EINVAL;
540 req->flags |= REQ_F_APOLL_MULTISHOT;
541 }
f9ead18c
JA
542
543#ifdef CONFIG_COMPAT
544 if (req->ctx->compat)
545 sr->msg_flags |= MSG_CMSG_COMPAT;
546#endif
547 sr->done_io = 0;
548 return 0;
549}
550
b3fdea6e
DY
551static inline void io_recv_prep_retry(struct io_kiocb *req)
552{
f2ccb5ae 553 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
b3fdea6e
DY
554
555 sr->done_io = 0;
556 sr->len = 0; /* get from the provided buffer */
557}
558
559/*
9bb66906 560 * Finishes io_recv and io_recvmsg.
b3fdea6e
DY
561 *
562 * Returns true if it is actually finished, or false if it should run
563 * again (for multishot).
564 */
9bb66906
DY
565static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
566 unsigned int cflags, bool mshot_finished)
b3fdea6e
DY
567{
568 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
569 io_req_set_res(req, *ret, cflags);
570 *ret = IOU_OK;
571 return true;
572 }
573
9bb66906 574 if (!mshot_finished) {
b3fdea6e
DY
575 if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret,
576 cflags | IORING_CQE_F_MORE, false)) {
577 io_recv_prep_retry(req);
578 return false;
579 }
580 /*
581 * Otherwise stop multishot but use the current result.
582 * Probably will end up going into overflow, but this means
583 * we cannot trust the ordering anymore
584 */
585 }
586
587 io_req_set_res(req, *ret, cflags);
588
589 if (req->flags & REQ_F_POLLED)
590 *ret = IOU_STOP_MULTISHOT;
e2df2ccb
DY
591 else
592 *ret = IOU_OK;
b3fdea6e
DY
593 return true;
594}
595
9bb66906
DY
596static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
597 struct io_sr_msg *sr, void __user **buf,
598 size_t *len)
599{
600 unsigned long ubuf = (unsigned long) *buf;
601 unsigned long hdr;
602
603 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
604 kmsg->controllen;
605 if (*len < hdr)
606 return -EFAULT;
607
608 if (kmsg->controllen) {
609 unsigned long control = ubuf + hdr - kmsg->controllen;
610
d1f6222c 611 kmsg->msg.msg_control_user = (void __user *) control;
9bb66906
DY
612 kmsg->msg.msg_controllen = kmsg->controllen;
613 }
614
615 sr->buf = *buf; /* stash for later copy */
d1f6222c 616 *buf = (void __user *) (ubuf + hdr);
9bb66906
DY
617 kmsg->payloadlen = *len = *len - hdr;
618 return 0;
619}
620
621struct io_recvmsg_multishot_hdr {
622 struct io_uring_recvmsg_out msg;
623 struct sockaddr_storage addr;
624};
625
626static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
627 struct io_async_msghdr *kmsg,
628 unsigned int flags, bool *finished)
629{
630 int err;
631 int copy_len;
632 struct io_recvmsg_multishot_hdr hdr;
633
634 if (kmsg->namelen)
635 kmsg->msg.msg_name = &hdr.addr;
636 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
637 kmsg->msg.msg_namelen = 0;
638
639 if (sock->file->f_flags & O_NONBLOCK)
640 flags |= MSG_DONTWAIT;
641
642 err = sock_recvmsg(sock, &kmsg->msg, flags);
643 *finished = err <= 0;
644 if (err < 0)
645 return err;
646
647 hdr.msg = (struct io_uring_recvmsg_out) {
648 .controllen = kmsg->controllen - kmsg->msg.msg_controllen,
649 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
650 };
651
652 hdr.msg.payloadlen = err;
653 if (err > kmsg->payloadlen)
654 err = kmsg->payloadlen;
655
656 copy_len = sizeof(struct io_uring_recvmsg_out);
657 if (kmsg->msg.msg_namelen > kmsg->namelen)
658 copy_len += kmsg->namelen;
659 else
660 copy_len += kmsg->msg.msg_namelen;
661
662 /*
663 * "fromlen shall refer to the value before truncation.."
664 * 1003.1g
665 */
666 hdr.msg.namelen = kmsg->msg.msg_namelen;
667
668 /* ensure that there is no gap between hdr and sockaddr_storage */
669 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
670 sizeof(struct io_uring_recvmsg_out));
671 if (copy_to_user(io->buf, &hdr, copy_len)) {
672 *finished = true;
673 return -EFAULT;
674 }
675
676 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
677 kmsg->controllen + err;
678}
679
f9ead18c
JA
680int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
681{
f2ccb5ae 682 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
f9ead18c
JA
683 struct io_async_msghdr iomsg, *kmsg;
684 struct socket *sock;
685 unsigned int cflags;
686 unsigned flags;
687 int ret, min_ret = 0;
688 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
9bb66906 689 bool mshot_finished = true;
f9ead18c
JA
690
691 sock = sock_from_file(req->file);
692 if (unlikely(!sock))
693 return -ENOTSOCK;
694
695 if (req_has_async_data(req)) {
696 kmsg = req->async_data;
697 } else {
698 ret = io_recvmsg_copy_hdr(req, &iomsg);
699 if (ret)
700 return ret;
701 kmsg = &iomsg;
702 }
703
704 if (!(req->flags & REQ_F_POLLED) &&
705 (sr->flags & IORING_RECVSEND_POLL_FIRST))
43e0bbbd 706 return io_setup_async_msg(req, kmsg, issue_flags);
f9ead18c 707
9bb66906 708retry_multishot:
f9ead18c
JA
709 if (io_do_buffer_select(req)) {
710 void __user *buf;
9bb66906 711 size_t len = sr->len;
f9ead18c 712
9bb66906 713 buf = io_buffer_select(req, &len, issue_flags);
f9ead18c
JA
714 if (!buf)
715 return -ENOBUFS;
9bb66906
DY
716
717 if (req->flags & REQ_F_APOLL_MULTISHOT) {
718 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
719 if (ret) {
720 io_kbuf_recycle(req, issue_flags);
721 return ret;
722 }
723 }
724
f9ead18c 725 kmsg->fast_iov[0].iov_base = buf;
9bb66906 726 kmsg->fast_iov[0].iov_len = len;
f9ead18c 727 iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
9bb66906 728 len);
f9ead18c
JA
729 }
730
731 flags = sr->msg_flags;
732 if (force_nonblock)
733 flags |= MSG_DONTWAIT;
734 if (flags & MSG_WAITALL)
735 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
736
737 kmsg->msg.msg_get_inq = 1;
9bb66906
DY
738 if (req->flags & REQ_F_APOLL_MULTISHOT)
739 ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
740 &mshot_finished);
741 else
742 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
743 kmsg->uaddr, flags);
744
f9ead18c 745 if (ret < min_ret) {
9bb66906
DY
746 if (ret == -EAGAIN && force_nonblock) {
747 ret = io_setup_async_msg(req, kmsg, issue_flags);
748 if (ret == -EAGAIN && (req->flags & IO_APOLL_MULTI_POLLED) ==
749 IO_APOLL_MULTI_POLLED) {
750 io_kbuf_recycle(req, issue_flags);
751 return IOU_ISSUE_SKIP_COMPLETE;
752 }
753 return ret;
754 }
f9ead18c
JA
755 if (ret > 0 && io_net_retry(sock, flags)) {
756 sr->done_io += ret;
757 req->flags |= REQ_F_PARTIAL_IO;
43e0bbbd 758 return io_setup_async_msg(req, kmsg, issue_flags);
f9ead18c 759 }
95eafc74
PB
760 if (ret == -ERESTARTSYS)
761 ret = -EINTR;
f9ead18c
JA
762 req_set_fail(req);
763 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
764 req_set_fail(req);
765 }
766
d4e097da 767 if (ret > 0)
f9ead18c
JA
768 ret += sr->done_io;
769 else if (sr->done_io)
770 ret = sr->done_io;
d4e097da
DY
771 else
772 io_kbuf_recycle(req, issue_flags);
773
f9ead18c
JA
774 cflags = io_put_kbuf(req, issue_flags);
775 if (kmsg->msg.msg_inq)
776 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
b3fdea6e 777
9bb66906
DY
778 if (!io_recv_finish(req, &ret, cflags, mshot_finished))
779 goto retry_multishot;
780
781 if (mshot_finished) {
782 io_netmsg_recycle(req, issue_flags);
783 /* fast path, check for non-NULL to avoid function call */
784 if (kmsg->free_iov)
785 kfree(kmsg->free_iov);
786 req->flags &= ~REQ_F_NEED_CLEANUP;
787 }
788
789 return ret;
f9ead18c
JA
790}
791
792int io_recv(struct io_kiocb *req, unsigned int issue_flags)
793{
f2ccb5ae 794 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
f9ead18c
JA
795 struct msghdr msg;
796 struct socket *sock;
797 struct iovec iov;
798 unsigned int cflags;
799 unsigned flags;
800 int ret, min_ret = 0;
801 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
b3fdea6e 802 size_t len = sr->len;
f9ead18c
JA
803
804 if (!(req->flags & REQ_F_POLLED) &&
805 (sr->flags & IORING_RECVSEND_POLL_FIRST))
806 return -EAGAIN;
807
808 sock = sock_from_file(req->file);
809 if (unlikely(!sock))
810 return -ENOTSOCK;
811
b3fdea6e 812retry_multishot:
f9ead18c
JA
813 if (io_do_buffer_select(req)) {
814 void __user *buf;
815
b3fdea6e 816 buf = io_buffer_select(req, &len, issue_flags);
f9ead18c
JA
817 if (!buf)
818 return -ENOBUFS;
819 sr->buf = buf;
820 }
821
b3fdea6e 822 ret = import_single_range(READ, sr->buf, len, &iov, &msg.msg_iter);
f9ead18c
JA
823 if (unlikely(ret))
824 goto out_free;
825
826 msg.msg_name = NULL;
827 msg.msg_namelen = 0;
828 msg.msg_control = NULL;
829 msg.msg_get_inq = 1;
830 msg.msg_flags = 0;
831 msg.msg_controllen = 0;
832 msg.msg_iocb = NULL;
e02b6651 833 msg.msg_ubuf = NULL;
f9ead18c
JA
834
835 flags = sr->msg_flags;
836 if (force_nonblock)
837 flags |= MSG_DONTWAIT;
838 if (flags & MSG_WAITALL)
839 min_ret = iov_iter_count(&msg.msg_iter);
840
841 ret = sock_recvmsg(sock, &msg, flags);
842 if (ret < min_ret) {
b3fdea6e
DY
843 if (ret == -EAGAIN && force_nonblock) {
844 if ((req->flags & IO_APOLL_MULTI_POLLED) == IO_APOLL_MULTI_POLLED) {
845 io_kbuf_recycle(req, issue_flags);
846 return IOU_ISSUE_SKIP_COMPLETE;
847 }
848
f9ead18c 849 return -EAGAIN;
b3fdea6e 850 }
f9ead18c
JA
851 if (ret > 0 && io_net_retry(sock, flags)) {
852 sr->len -= ret;
853 sr->buf += ret;
854 sr->done_io += ret;
855 req->flags |= REQ_F_PARTIAL_IO;
856 return -EAGAIN;
857 }
95eafc74
PB
858 if (ret == -ERESTARTSYS)
859 ret = -EINTR;
f9ead18c
JA
860 req_set_fail(req);
861 } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
862out_free:
863 req_set_fail(req);
864 }
865
d4e097da 866 if (ret > 0)
f9ead18c
JA
867 ret += sr->done_io;
868 else if (sr->done_io)
869 ret = sr->done_io;
d4e097da
DY
870 else
871 io_kbuf_recycle(req, issue_flags);
872
f9ead18c
JA
873 cflags = io_put_kbuf(req, issue_flags);
874 if (msg.msg_inq)
875 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
b3fdea6e 876
9bb66906 877 if (!io_recv_finish(req, &ret, cflags, ret <= 0))
b3fdea6e
DY
878 goto retry_multishot;
879
880 return ret;
f9ead18c
JA
881}
882
b48c312b
PB
883void io_sendzc_cleanup(struct io_kiocb *req)
884{
ac9e5784 885 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
b48c312b
PB
886
887 zc->notif->flags |= REQ_F_CQE_SKIP;
888 io_notif_flush(zc->notif);
889 zc->notif = NULL;
890}
891
06a5464b
PB
892int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
893{
ac9e5784 894 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
10c7d33e 895 struct io_ring_ctx *ctx = req->ctx;
b48c312b 896 struct io_kiocb *notif;
06a5464b 897
b48c312b
PB
898 if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3) ||
899 READ_ONCE(sqe->__pad3[0]))
900 return -EINVAL;
901 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
902 if (req->flags & REQ_F_CQE_SKIP)
06a5464b
PB
903 return -EINVAL;
904
905 zc->flags = READ_ONCE(sqe->ioprio);
63809137 906 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST |
57f33224 907 IORING_RECVSEND_FIXED_BUF))
06a5464b 908 return -EINVAL;
e3366e02
PB
909 notif = zc->notif = io_alloc_notif(ctx);
910 if (!notif)
911 return -ENOMEM;
912 notif->cqe.user_data = req->cqe.user_data;
913 notif->cqe.res = 0;
914 notif->cqe.flags = IORING_CQE_F_NOTIF;
915 req->flags |= REQ_F_NEED_CLEANUP;
10c7d33e
PB
916 if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
917 unsigned idx = READ_ONCE(sqe->buf_index);
918
919 if (unlikely(idx >= ctx->nr_user_bufs))
920 return -EFAULT;
921 idx = array_index_nospec(idx, ctx->nr_user_bufs);
922 req->imu = READ_ONCE(ctx->user_bufs[idx]);
e3366e02 923 io_req_set_rsrc_node(notif, ctx, 0);
10c7d33e 924 }
06a5464b
PB
925
926 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
927 zc->len = READ_ONCE(sqe->len);
928 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
06a5464b
PB
929 if (zc->msg_flags & MSG_DONTWAIT)
930 req->flags |= REQ_F_NOWAIT;
092aeedb
PB
931
932 zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
933 zc->addr_len = READ_ONCE(sqe->addr_len);
4a933e62 934 zc->done_io = 0;
092aeedb 935
06a5464b
PB
936#ifdef CONFIG_COMPAT
937 if (req->ctx->compat)
938 zc->msg_flags |= MSG_CMSG_COMPAT;
939#endif
940 return 0;
941}
942
cd9021e8
PB
943static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb,
944 struct iov_iter *from, size_t length)
945{
946 skb_zcopy_downgrade_managed(skb);
947 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
948}
949
3ff1a0d3
PB
950static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
951 struct iov_iter *from, size_t length)
952{
953 struct skb_shared_info *shinfo = skb_shinfo(skb);
954 int frag = shinfo->nr_frags;
955 int ret = 0;
956 struct bvec_iter bi;
957 ssize_t copied = 0;
958 unsigned long truesize = 0;
959
cd9021e8 960 if (!frag)
3ff1a0d3 961 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
cd9021e8 962 else if (unlikely(!skb_zcopy_managed(skb)))
3ff1a0d3 963 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
3ff1a0d3
PB
964
965 bi.bi_size = min(from->count, length);
966 bi.bi_bvec_done = from->iov_offset;
967 bi.bi_idx = 0;
968
969 while (bi.bi_size && frag < MAX_SKB_FRAGS) {
970 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
971
972 copied += v.bv_len;
973 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
974 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
975 v.bv_offset, v.bv_len);
976 bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
977 }
978 if (bi.bi_size)
979 ret = -EMSGSIZE;
980
981 shinfo->nr_frags = frag;
982 from->bvec += bi.bi_idx;
983 from->nr_segs -= bi.bi_idx;
dfb58b17 984 from->count -= copied;
3ff1a0d3
PB
985 from->iov_offset = bi.bi_bvec_done;
986
987 skb->data_len += copied;
988 skb->len += copied;
989 skb->truesize += truesize;
990
991 if (sk && sk->sk_type == SOCK_STREAM) {
992 sk_wmem_queued_add(sk, truesize);
993 if (!skb_zcopy_pure(skb))
994 sk_mem_charge(sk, truesize);
995 } else {
996 refcount_add(truesize, &skb->sk->sk_wmem_alloc);
997 }
998 return ret;
999}
1000
06a5464b
PB
1001int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
1002{
581711c4 1003 struct sockaddr_storage __address, *addr = NULL;
ac9e5784 1004 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
06a5464b
PB
1005 struct msghdr msg;
1006 struct iovec iov;
1007 struct socket *sock;
b48c312b 1008 unsigned msg_flags, cflags;
06a5464b
PB
1009 int ret, min_ret = 0;
1010
06a5464b
PB
1011 sock = sock_from_file(req->file);
1012 if (unlikely(!sock))
1013 return -ENOTSOCK;
1014
06a5464b
PB
1015 msg.msg_name = NULL;
1016 msg.msg_control = NULL;
1017 msg.msg_controllen = 0;
1018 msg.msg_namelen = 0;
1019
86dc8f23 1020 if (zc->addr) {
581711c4
PB
1021 if (req_has_async_data(req)) {
1022 struct io_async_msghdr *io = req->async_data;
1023
1024 msg.msg_name = addr = &io->addr;
1025 } else {
1026 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address);
1027 if (unlikely(ret < 0))
1028 return ret;
1029 msg.msg_name = (struct sockaddr *)&__address;
1030 addr = &__address;
1031 }
86dc8f23
PB
1032 msg.msg_namelen = zc->addr_len;
1033 }
1034
3c840053
PB
1035 if (!(req->flags & REQ_F_POLLED) &&
1036 (zc->flags & IORING_RECVSEND_POLL_FIRST))
1037 return io_setup_async_addr(req, addr, issue_flags);
1038
10c7d33e
PB
1039 if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
1040 ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu,
1041 (u64)(uintptr_t)zc->buf, zc->len);
1042 if (unlikely(ret))
986e263d 1043 return ret;
cd9021e8 1044 msg.sg_from_iter = io_sg_from_iter;
10c7d33e
PB
1045 } else {
1046 ret = import_single_range(WRITE, zc->buf, zc->len, &iov,
1047 &msg.msg_iter);
1048 if (unlikely(ret))
1049 return ret;
b48c312b 1050 ret = io_notif_account_mem(zc->notif, zc->len);
2e32ba56
PB
1051 if (unlikely(ret))
1052 return ret;
cd9021e8 1053 msg.sg_from_iter = io_sg_from_iter_iovec;
10c7d33e 1054 }
06a5464b
PB
1055
1056 msg_flags = zc->msg_flags | MSG_ZEROCOPY;
1057 if (issue_flags & IO_URING_F_NONBLOCK)
1058 msg_flags |= MSG_DONTWAIT;
1059 if (msg_flags & MSG_WAITALL)
1060 min_ret = iov_iter_count(&msg.msg_iter);
1061
1062 msg.msg_flags = msg_flags;
b48c312b 1063 msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
06a5464b
PB
1064 ret = sock_sendmsg(sock, &msg);
1065
1066 if (unlikely(ret < min_ret)) {
1067 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
581711c4
PB
1068 return io_setup_async_addr(req, addr, issue_flags);
1069
4a933e62
PB
1070 if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
1071 zc->len -= ret;
1072 zc->buf += ret;
1073 zc->done_io += ret;
1074 req->flags |= REQ_F_PARTIAL_IO;
581711c4 1075 return io_setup_async_addr(req, addr, issue_flags);
4a933e62 1076 }
b48c312b
PB
1077 if (ret < 0 && !zc->done_io)
1078 zc->notif->flags |= REQ_F_CQE_SKIP;
4a933e62
PB
1079 if (ret == -ERESTARTSYS)
1080 ret = -EINTR;
5a848b7c 1081 req_set_fail(req);
06a5464b
PB
1082 }
1083
4a933e62
PB
1084 if (ret >= 0)
1085 ret += zc->done_io;
1086 else if (zc->done_io)
1087 ret = zc->done_io;
b48c312b
PB
1088
1089 io_notif_flush(zc->notif);
1090 req->flags &= ~REQ_F_NEED_CLEANUP;
1091 cflags = ret >= 0 ? IORING_CQE_F_MORE : 0;
1092 io_req_set_res(req, ret, cflags);
06a5464b
PB
1093 return IOU_OK;
1094}
1095
f9ead18c
JA
1096int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1097{
f2ccb5ae 1098 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
f9ead18c
JA
1099 unsigned flags;
1100
1101 if (sqe->len || sqe->buf_index)
1102 return -EINVAL;
1103
1104 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1105 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1106 accept->flags = READ_ONCE(sqe->accept_flags);
1107 accept->nofile = rlimit(RLIMIT_NOFILE);
1108 flags = READ_ONCE(sqe->ioprio);
1109 if (flags & ~IORING_ACCEPT_MULTISHOT)
1110 return -EINVAL;
1111
1112 accept->file_slot = READ_ONCE(sqe->file_index);
1113 if (accept->file_slot) {
1114 if (accept->flags & SOCK_CLOEXEC)
1115 return -EINVAL;
1116 if (flags & IORING_ACCEPT_MULTISHOT &&
1117 accept->file_slot != IORING_FILE_INDEX_ALLOC)
1118 return -EINVAL;
1119 }
1120 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1121 return -EINVAL;
1122 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1123 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1124 if (flags & IORING_ACCEPT_MULTISHOT)
1125 req->flags |= REQ_F_APOLL_MULTISHOT;
1126 return 0;
1127}
1128
1129int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1130{
1131 struct io_ring_ctx *ctx = req->ctx;
f2ccb5ae 1132 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
f9ead18c
JA
1133 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1134 unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
1135 bool fixed = !!accept->file_slot;
1136 struct file *file;
1137 int ret, fd;
1138
1139retry:
1140 if (!fixed) {
1141 fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1142 if (unlikely(fd < 0))
1143 return fd;
1144 }
1145 file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
1146 accept->flags);
1147 if (IS_ERR(file)) {
1148 if (!fixed)
1149 put_unused_fd(fd);
1150 ret = PTR_ERR(file);
1151 if (ret == -EAGAIN && force_nonblock) {
1152 /*
1153 * if it's multishot and polled, we don't need to
1154 * return EAGAIN to arm the poll infra since it
1155 * has already been done
1156 */
1157 if ((req->flags & IO_APOLL_MULTI_POLLED) ==
1158 IO_APOLL_MULTI_POLLED)
1159 ret = IOU_ISSUE_SKIP_COMPLETE;
1160 return ret;
1161 }
1162 if (ret == -ERESTARTSYS)
1163 ret = -EINTR;
1164 req_set_fail(req);
1165 } else if (!fixed) {
1166 fd_install(fd, file);
1167 ret = fd;
1168 } else {
1169 ret = io_fixed_fd_install(req, issue_flags, file,
1170 accept->file_slot);
1171 }
1172
1173 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
1174 io_req_set_res(req, ret, 0);
1175 return IOU_OK;
1176 }
f9ead18c 1177
cbd25748
DY
1178 if (ret >= 0 &&
1179 io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false))
d245bca6 1180 goto retry;
cbd25748
DY
1181
1182 io_req_set_res(req, ret, 0);
1183 if (req->flags & REQ_F_POLLED)
1184 return IOU_STOP_MULTISHOT;
1185 return IOU_OK;
f9ead18c
JA
1186}
1187
1188int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1189{
f2ccb5ae 1190 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
f9ead18c
JA
1191
1192 if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1193 return -EINVAL;
1194
1195 sock->domain = READ_ONCE(sqe->fd);
1196 sock->type = READ_ONCE(sqe->off);
1197 sock->protocol = READ_ONCE(sqe->len);
1198 sock->file_slot = READ_ONCE(sqe->file_index);
1199 sock->nofile = rlimit(RLIMIT_NOFILE);
1200
1201 sock->flags = sock->type & ~SOCK_TYPE_MASK;
1202 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1203 return -EINVAL;
1204 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1205 return -EINVAL;
1206 return 0;
1207}
1208
1209int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1210{
f2ccb5ae 1211 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
f9ead18c
JA
1212 bool fixed = !!sock->file_slot;
1213 struct file *file;
1214 int ret, fd;
1215
1216 if (!fixed) {
1217 fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1218 if (unlikely(fd < 0))
1219 return fd;
1220 }
1221 file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1222 if (IS_ERR(file)) {
1223 if (!fixed)
1224 put_unused_fd(fd);
1225 ret = PTR_ERR(file);
1226 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1227 return -EAGAIN;
1228 if (ret == -ERESTARTSYS)
1229 ret = -EINTR;
1230 req_set_fail(req);
1231 } else if (!fixed) {
1232 fd_install(fd, file);
1233 ret = fd;
1234 } else {
1235 ret = io_fixed_fd_install(req, issue_flags, file,
1236 sock->file_slot);
1237 }
1238 io_req_set_res(req, ret, 0);
1239 return IOU_OK;
1240}
1241
1242int io_connect_prep_async(struct io_kiocb *req)
1243{
1244 struct io_async_connect *io = req->async_data;
f2ccb5ae 1245 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
f9ead18c
JA
1246
1247 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
1248}
1249
1250int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1251{
f2ccb5ae 1252 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
f9ead18c
JA
1253
1254 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1255 return -EINVAL;
1256
1257 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1258 conn->addr_len = READ_ONCE(sqe->addr2);
1259 return 0;
1260}
1261
1262int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1263{
f2ccb5ae 1264 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
f9ead18c
JA
1265 struct io_async_connect __io, *io;
1266 unsigned file_flags;
1267 int ret;
1268 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1269
1270 if (req_has_async_data(req)) {
1271 io = req->async_data;
1272 } else {
1273 ret = move_addr_to_kernel(connect->addr,
1274 connect->addr_len,
1275 &__io.address);
1276 if (ret)
1277 goto out;
1278 io = &__io;
1279 }
1280
1281 file_flags = force_nonblock ? O_NONBLOCK : 0;
1282
1283 ret = __sys_connect_file(req->file, &io->address,
1284 connect->addr_len, file_flags);
1285 if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
1286 if (req_has_async_data(req))
1287 return -EAGAIN;
1288 if (io_alloc_async_data(req)) {
1289 ret = -ENOMEM;
1290 goto out;
1291 }
1292 memcpy(req->async_data, &__io, sizeof(__io));
1293 return -EAGAIN;
1294 }
1295 if (ret == -ERESTARTSYS)
1296 ret = -EINTR;
1297out:
1298 if (ret < 0)
1299 req_set_fail(req);
1300 io_req_set_res(req, ret, 0);
1301 return IOU_OK;
1302}
43e0bbbd
JA
1303
1304void io_netmsg_cache_free(struct io_cache_entry *entry)
1305{
1306 kfree(container_of(entry, struct io_async_msghdr, cache));
1307}
f9ead18c 1308#endif