io_uring: allow POLL_ADD with double poll_wait() users io_uring-5.6-poll
authorJens Axboe <axboe@kernel.dk>
Mon, 10 Feb 2020 20:48:46 +0000 (13:48 -0700)
committerJens Axboe <axboe@kernel.dk>
Thu, 13 Feb 2020 00:44:07 +0000 (17:44 -0700)
Some file descriptors use separate waitqueues for their f_ops->poll()
handler, most commonly one for read and one for write. The io_uring
poll implementation doesn't work with that, as the 2nd poll_wait()
call will cause the io_uring poll request to -EINVAL.

This is particularly a problem now that pipes were switched to using
multiple wait queues (commit 0ddad21d3e99), but it also affects tty
devices and /dev/random as well. This is a big problem for event loops
where some file descriptors work, and others don't.

With this fix, io_uring handles multiple waitqueues.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c

index 9cd2ce3b8ad9758033c7f3c00ec680134514a8ae..9f00f30e1790565efeb0866d9feea788b6779d73 100644 (file)
@@ -3440,10 +3440,27 @@ out:
 #endif
 }
 
+static void io_poll_remove_double(struct io_kiocb *req)
+{
+       struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
+
+       if (poll && poll->head) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&poll->head->lock, flags);
+               list_del_init(&poll->wait.entry);
+               if (poll->wait.private)
+                       refcount_dec(&req->refs);
+               spin_unlock_irqrestore(&poll->head->lock, flags);
+       }
+}
+
 static void io_poll_remove_one(struct io_kiocb *req)
 {
        struct io_poll_iocb *poll = &req->poll;
 
+       io_poll_remove_double(req);
+
        spin_lock(&poll->head->lock);
        WRITE_ONCE(poll->canceled, true);
        if (!list_empty(&poll->wait.entry)) {
@@ -3679,10 +3696,38 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
        if (mask && !(mask & poll->events))
                return 0;
 
+       io_poll_remove_double(req);
        __io_poll_wake(req, &req->poll, mask);
        return 1;
 }
 
+static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
+                              int sync, void *key)
+{
+       struct io_kiocb *req = wait->private;
+       struct io_poll_iocb *poll = (void *) req->io;
+       __poll_t mask = key_to_poll(key);
+       bool done = true;
+
+       /* for instances that support it check for an event match first: */
+       if (mask && !(mask & poll->events))
+               return 0;
+
+       if (req->poll.head) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&req->poll.head->lock, flags);
+               done = list_empty(&req->poll.wait.entry);
+               if (!done)
+                       list_del_init(&req->poll.wait.entry);
+               spin_unlock_irqrestore(&req->poll.head->lock, flags);
+       }
+       if (!done)
+               __io_poll_wake(req, poll, mask);
+       refcount_dec(&req->refs);
+       return 1;
+}
+
 struct io_poll_table {
        struct poll_table_struct pt;
        struct io_kiocb *req;
@@ -3693,15 +3738,38 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
                               struct poll_table_struct *p)
 {
        struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+       struct io_kiocb *req = pt->req;
+       struct io_poll_iocb *poll = &req->poll;
 
-       if (unlikely(pt->req->poll.head)) {
-               pt->error = -EINVAL;
-               return;
+       /*
+        * If poll->head is already set, it's because the file being polled
+        * use multiple waitqueues for poll handling (eg one for read, one
+        * for write). Setup a separate io_poll_iocb if this happens.
+        */
+       if (unlikely(poll->head)) {
+               /* already have a 2nd entry, fail a third attempt */
+               if (req->io) {
+                       pt->error = -EINVAL;
+                       return;
+               }
+               poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
+               if (!poll) {
+                       pt->error = -ENOMEM;
+                       return;
+               }
+               poll->done = false;
+               poll->canceled = false;
+               poll->events = req->poll.events;
+               INIT_LIST_HEAD(&poll->wait.entry);
+               init_waitqueue_func_entry(&poll->wait, io_poll_double_wake);
+               refcount_inc(&req->refs);
+               poll->wait.private = req;
+               req->io = (void *) poll;
        }
 
        pt->error = 0;
-       pt->req->poll.head = head;
-       add_wait_queue(head, &pt->req->poll.wait);
+       poll->head = head;
+       add_wait_queue(head, &poll->wait);
 }
 
 static void io_poll_req_insert(struct io_kiocb *req)
@@ -3778,6 +3846,7 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
        }
        if (mask) { /* no async, we'd stolen it */
                ipt.error = 0;
+               io_poll_remove_double(req);
                io_poll_complete(req, mask, 0);
        }
        spin_unlock_irq(&ctx->completion_lock);