io_uring: add support for epoll_ctl(2) for-5.6/io_uring-epoll-test
authorJens Axboe <axboe@kernel.dk>
Wed, 8 Jan 2020 22:18:09 +0000 (15:18 -0700)
committerJens Axboe <axboe@kernel.dk>
Wed, 22 Jan 2020 16:37:20 +0000 (09:37 -0700)
This adds IORING_OP_EPOLL_CTL, which can perform the same work as the
epoll_ctl(2) system call.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c
include/uapi/linux/io_uring.h

index 09503d1e9e45697f2208a57fb5186c6d5cd5ced2..64dc9e5df6d4e992e3376968f3ad183d0644fb49 100644 (file)
@@ -74,6 +74,7 @@
 #include <linux/namei.h>
 #include <linux/fsnotify.h>
 #include <linux/fadvise.h>
+#include <linux/eventpoll.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/io_uring.h>
@@ -421,6 +422,14 @@ struct io_madvise {
        u32                             advice;
 };
 
+struct io_epoll {
+       struct file                     *file;
+       int                             epfd;
+       int                             op;
+       int                             fd;
+       struct epoll_event              event;
+};
+
 struct io_async_connect {
        struct sockaddr_storage         address;
 };
@@ -534,6 +543,7 @@ struct io_kiocb {
                struct io_files_update  files_update;
                struct io_fadvise       fadvise;
                struct io_madvise       madvise;
+               struct io_epoll         epoll;
        };
 
        struct io_async_ctx             *io;
@@ -719,6 +729,9 @@ static const struct io_op_def io_op_defs[] = {
                .needs_file             = 1,
                .fd_non_neg             = 1,
        },
+       [IORING_OP_EPOLL_CTL] = {
+               .unbound_nonreg_file    = 1,
+       },
 };
 
 static void io_wq_submit_work(struct io_wq_work **workptr);
@@ -2578,6 +2591,54 @@ static int io_openat(struct io_kiocb *req, struct io_kiocb **nxt,
        return io_openat2(req, nxt, force_nonblock);
 }
 
+static int io_epoll_ctl_prep(struct io_kiocb *req,
+                            const struct io_uring_sqe *sqe)
+{
+#if defined(CONFIG_EPOLL)
+       if (sqe->ioprio || sqe->buf_index)
+               return -EINVAL;
+
+       req->epoll.epfd = READ_ONCE(sqe->fd);
+       req->epoll.op = READ_ONCE(sqe->len);
+       req->epoll.fd = READ_ONCE(sqe->off);
+
+       if (ep_op_has_event(req->epoll.op)) {
+               struct epoll_event __user *ev;
+
+               ev = u64_to_user_ptr(READ_ONCE(sqe->addr));
+               if (copy_from_user(&req->epoll.event, ev, sizeof(*ev)))
+                       return -EFAULT;
+       }
+
+       return 0;
+#else
+       return -EOPNOTSUPP;
+#endif
+}
+
+static int io_epoll_ctl(struct io_kiocb *req, struct io_kiocb **nxt,
+                       bool force_nonblock)
+{
+#if defined(CONFIG_EPOLL)
+       struct io_epoll *ie = &req->epoll;
+       int ret;
+
+       ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock);
+       if (force_nonblock && ret == -EAGAIN) {
+               req->work.flags |= IO_WQ_WORK_NEEDS_FILES;
+               return -EAGAIN;
+       }
+
+       if (ret < 0)
+               req_set_fail_links(req);
+       io_cqring_add_event(req, ret);
+       io_put_req_find_next(req, nxt);
+       return 0;
+#else
+       return -EOPNOTSUPP;
+#endif
+}
+
 static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
@@ -4039,6 +4100,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
        case IORING_OP_OPENAT2:
                ret = io_openat2_prep(req, sqe);
                break;
+       case IORING_OP_EPOLL_CTL:
+               ret = io_epoll_ctl_prep(req, sqe);
+               break;
        default:
                printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
                                req->opcode);
@@ -4267,6 +4331,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                }
                ret = io_openat2(req, nxt, force_nonblock);
                break;
+       case IORING_OP_EPOLL_CTL:
+               if (sqe) {
+                       ret = io_epoll_ctl_prep(req, sqe);
+                       if (ret)
+                               break;
+               }
+               ret = io_epoll_ctl(req, nxt, force_nonblock);
+               break;
        default:
                ret = -EINVAL;
                break;
index 57d05cc5e27159cb9ff93ee204907deca9b262d5..cffa6fd33827bbb66e34316ba659de42442f6987 100644 (file)
@@ -106,6 +106,7 @@ enum {
        IORING_OP_SEND,
        IORING_OP_RECV,
        IORING_OP_OPENAT2,
+       IORING_OP_EPOLL_CTL,
 
        /* this goes last, obviously */
        IORING_OP_LAST,