io_uring: add support for epoll_ctl(2) for-5.6/io_uring-vfs-next
authorJens Axboe <axboe@kernel.dk>
Wed, 8 Jan 2020 22:18:09 +0000 (15:18 -0700)
committerJens Axboe <axboe@kernel.dk>
Wed, 15 Jan 2020 22:05:05 +0000 (15:05 -0700)
This adds IORING_OP_EPOLL_CTL, which can perform the same work as the
epoll_ctl(2) system call.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c
include/uapi/linux/io_uring.h

index ef9747c8d5fbe0115500d40231a6bd66b0f0ea55..aad51f72e3fccf818a3d57fe4dc6fb046f645d2c 100644 (file)
@@ -73,6 +73,7 @@
 #include <linux/namei.h>
 #include <linux/fsnotify.h>
 #include <linux/fadvise.h>
+#include <linux/eventpoll.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/io_uring.h>
@@ -418,6 +419,14 @@ struct io_madvise {
        u32                             advice;
 };
 
+struct io_epoll {
+       struct file                     *file;
+       int                             epfd;
+       int                             op;
+       int                             fd;
+       struct epoll_event              event;
+};
+
 struct io_async_connect {
        struct sockaddr_storage         address;
 };
@@ -472,6 +481,7 @@ struct io_kiocb {
                struct io_files_update  files_update;
                struct io_fadvise       fadvise;
                struct io_madvise       madvise;
+               struct io_epoll         epoll;
        };
 
        struct io_async_ctx             *io;
@@ -712,6 +722,10 @@ static const struct io_op_def io_op_defs[] = {
                .needs_file             = 1,
                .fd_non_neg             = 1,
        },
+       {
+               /* IORING_OP_EPOLL_CTL */
+               .unbound_nonreg_file    = 1,
+       },
 };
 
 static void io_wq_submit_work(struct io_wq_work **workptr);
@@ -2561,6 +2575,54 @@ static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt,
        return io_openat2(req, nxt, force_nonblock);
 }
 
+static int io_epoll_ctl_prep(struct io_kiocb *req,
+                            const struct io_uring_sqe *sqe)
+{
+#if defined(CONFIG_EPOLL)
+       if (sqe->ioprio || sqe->buf_index || sqe->off)
+               return -EINVAL;
+
+       req->epoll.epfd = READ_ONCE(sqe->fd);
+       req->epoll.op = READ_ONCE(sqe->len);
+       req->epoll.fd = READ_ONCE(sqe->off);
+
+       if (ep_op_has_event(req->epoll.op)) {
+               struct epoll_event __user *ev;
+
+               ev = u64_to_user_ptr(READ_ONCE(sqe->addr));
+               if (copy_from_user(&req->epoll.event, ev, sizeof(*ev)))
+                       return -EFAULT;
+       }
+
+       return 0;
+#else
+       return -EOPNOTSUPP;
+#endif
+}
+
+static int io_epoll_ctl(struct io_kiocb *req, struct io_kiocb **nxt,
+                       bool force_nonblock)
+{
+#if defined(CONFIG_EPOLL)
+       struct io_epoll *ie = &req->epoll;
+       int ret;
+
+       ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock);
+       if (force_nonblock && ret == -EAGAIN) {
+               req->work.flags |= IO_WQ_WORK_NEEDS_FILES;
+               return -EAGAIN;
+       }
+
+       if (ret < 0)
+               req_set_fail_links(req);
+       io_cqring_add_event(req, ret);
+       io_put_req_find_next(req, nxt);
+       return 0;
+#else
+       return -EOPNOTSUPP;
+#endif
+}
+
 static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
@@ -4022,6 +4084,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
        case IORING_OP_OPENAT2:
                ret = io_openat2_prep(req, sqe);
                break;
+       case IORING_OP_EPOLL_CTL:
+               ret = io_epoll_ctl_prep(req, sqe);
+               break;
        default:
                printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
                                req->opcode);
@@ -4250,6 +4315,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                }
                ret = io_openat2(req, nxt, force_nonblock);
                break;
+       case IORING_OP_EPOLL_CTL:
+               if (sqe) {
+                       ret = io_epoll_ctl_prep(req, sqe);
+                       if (ret)
+                               break;
+               }
+               ret = io_epoll_ctl(req, nxt, force_nonblock);
+               break;
        default:
                ret = -EINVAL;
                break;
index fea7da1828514bc944d6f117d4484ce794173bf6..c5514d658a3d94693fcbb4d08e3e66740a3e83c9 100644 (file)
@@ -93,6 +93,7 @@ enum {
        IORING_OP_SEND,
        IORING_OP_RECV,
        IORING_OP_OPENAT2,
+       IORING_OP_EPOLL_CTL,
 
        /* this goes last, obviously */
        IORING_OP_LAST,