io_uring/poll.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 #include <linux/kernel.h>
   3 #include <linux/errno.h>
   4 #include <linux/fs.h>
   5 #include <linux/file.h>
   6 #include <linux/mm.h>
   7 #include <linux/slab.h>
   8 #include <linux/poll.h>
   9 #include <linux/hashtable.h>
  10 #include <linux/io_uring.h>
  11
  12 #include <trace/events/io_uring.h>
  13
  14 #include <uapi/linux/io_uring.h>
  15
  16 #include "io_uring_types.h"
  17 #include "io_uring.h"
  18 #include "refs.h"
  19 #include "opdef.h"
  20 #include "kbuf.h"
  21 #include "poll.h"
  22
  23 struct io_poll_update {
  24         struct file                     *file;
  25         u64                             old_user_data;
  26         u64                             new_user_data;
  27         __poll_t                        events;
  28         bool                            update_events;
  29         bool                            update_user_data;
  30 };
  31
  32 struct io_poll_table {
  33         struct poll_table_struct pt;
  34         struct io_kiocb *req;
  35         int nr_entries;
  36         int error;
  37 };
  38
  39 #define IO_POLL_CANCEL_FLAG     BIT(31)
  40 #define IO_POLL_REF_MASK        GENMASK(30, 0)
  41
  42 /*
  43  * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
  44  * bump it and acquire ownership. It's disallowed to modify requests while not
  45  * owning it, that prevents from races for enqueueing task_work's and b/w
  46  * arming poll and wakeups.
  47  */
  48 static inline bool io_poll_get_ownership(struct io_kiocb *req)
  49 {
  50         return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
  51 }
  52
  53 static void io_poll_mark_cancelled(struct io_kiocb *req)
  54 {
  55         atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs);
  56 }
  57
  58 static struct io_poll *io_poll_get_double(struct io_kiocb *req)
  59 {
  60         /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
  61         if (req->opcode == IORING_OP_POLL_ADD)
  62                 return req->async_data;
  63         return req->apoll->double_poll;
  64 }
  65
  66 static struct io_poll *io_poll_get_single(struct io_kiocb *req)
  67 {
  68         if (req->opcode == IORING_OP_POLL_ADD)
  69                 return io_kiocb_to_cmd(req);
  70         return &req->apoll->poll;
  71 }
  72
  73 static void io_poll_req_insert(struct io_kiocb *req)
  74 {
  75         struct io_ring_ctx *ctx = req->ctx;
  76         struct hlist_head *list;
  77
  78         list = &ctx->cancel_hash[hash_long(req->cqe.user_data, ctx->cancel_hash_bits)];
  79         hlist_add_head(&req->hash_node, list);
  80 }
  81
  82 static void io_init_poll_iocb(struct io_poll *poll, __poll_t events,
  83                               wait_queue_func_t wake_func)
  84 {
  85         poll->head = NULL;
  86 #define IO_POLL_UNMASK  (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
  87         /* mask in events that we always want/need */
  88         poll->events = events | IO_POLL_UNMASK;
  89         INIT_LIST_HEAD(&poll->wait.entry);
  90         init_waitqueue_func_entry(&poll->wait, wake_func);
  91 }
  92
  93 static inline void io_poll_remove_entry(struct io_poll *poll)
  94 {
  95         struct wait_queue_head *head = smp_load_acquire(&poll->head);
  96
  97         if (head) {
  98                 spin_lock_irq(&head->lock);
  99                 list_del_init(&poll->wait.entry);
 100                 poll->head = NULL;
 101                 spin_unlock_irq(&head->lock);
 102         }
 103 }
 104
 105 static void io_poll_remove_entries(struct io_kiocb *req)
 106 {
 107         /*
 108          * Nothing to do if neither of those flags are set. Avoid dipping
 109          * into the poll/apoll/double cachelines if we can.
 110          */
 111         if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL)))
 112                 return;
 113
 114         /*
 115          * While we hold the waitqueue lock and the waitqueue is nonempty,
 116          * wake_up_pollfree() will wait for us.  However, taking the waitqueue
 117          * lock in the first place can race with the waitqueue being freed.
 118          *
 119          * We solve this as eventpoll does: by taking advantage of the fact that
 120          * all users of wake_up_pollfree() will RCU-delay the actual free.  If
 121          * we enter rcu_read_lock() and see that the pointer to the queue is
 122          * non-NULL, we can then lock it without the memory being freed out from
 123          * under us.
 124          *
 125          * Keep holding rcu_read_lock() as long as we hold the queue lock, in
 126          * case the caller deletes the entry from the queue, leaving it empty.
 127          * In that case, only RCU prevents the queue memory from being freed.
 128          */
 129         rcu_read_lock();
 130         if (req->flags & REQ_F_SINGLE_POLL)
 131                 io_poll_remove_entry(io_poll_get_single(req));
 132         if (req->flags & REQ_F_DOUBLE_POLL)
 133                 io_poll_remove_entry(io_poll_get_double(req));
 134         rcu_read_unlock();
 135 }
 136
 137 /*
 138  * All poll tw should go through this. Checks for poll events, manages
 139  * references, does rewait, etc.
 140  *
 141  * Returns a negative error on failure. >0 when no action require, which is
 142  * either spurious wakeup or multishot CQE is served. 0 when it's done with
 143  * the request, then the mask is stored in req->cqe.res.
 144  */
 145 static int io_poll_check_events(struct io_kiocb *req, bool *locked)
 146 {
 147         struct io_ring_ctx *ctx = req->ctx;
 148         int v, ret;
 149
 150         /* req->task == current here, checking PF_EXITING is safe */
 151         if (unlikely(req->task->flags & PF_EXITING))
 152                 return -ECANCELED;
 153
 154         do {
 155                 v = atomic_read(&req->poll_refs);
 156
 157                 /* tw handler should be the owner, and so have some references */
 158                 if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK)))
 159                         return 0;
 160                 if (v & IO_POLL_CANCEL_FLAG)
 161                         return -ECANCELED;
 162
 163                 if (!req->cqe.res) {
 164                         struct poll_table_struct pt = { ._key = req->apoll_events };
 165                         req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
 166                 }
 167
 168                 if ((unlikely(!req->cqe.res)))
 169                         continue;
 170                 if (req->apoll_events & EPOLLONESHOT)
 171                         return 0;
 172
 173                 /* multishot, just fill a CQE and proceed */
 174                 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
 175                         __poll_t mask = mangle_poll(req->cqe.res &
 176                                                     req->apoll_events);
 177                         bool filled;
 178
 179                         spin_lock(&ctx->completion_lock);
 180                         filled = io_fill_cqe_aux(ctx, req->cqe.user_data,
 181                                                  mask, IORING_CQE_F_MORE);
 182                         io_commit_cqring(ctx);
 183                         spin_unlock(&ctx->completion_lock);
 184                         if (filled) {
 185                                 io_cqring_ev_posted(ctx);
 186                                 continue;
 187                         }
 188                         return -ECANCELED;
 189                 }
 190
 191                 ret = io_poll_issue(req, locked);
 192                 if (ret)
 193                         return ret;
 194
 195                 /*
 196                  * Release all references, retry if someone tried to restart
 197                  * task_work while we were executing it.
 198                  */
 199         } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs));
 200
 201         return 1;
 202 }
 203
 204 static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 205 {
 206         struct io_ring_ctx *ctx = req->ctx;
 207         int ret;
 208
 209         ret = io_poll_check_events(req, locked);
 210         if (ret > 0)
 211                 return;
 212
 213         if (!ret) {
 214                 struct io_poll *poll = io_kiocb_to_cmd(req);
 215
 216                 req->cqe.res = mangle_poll(req->cqe.res & poll->events);
 217         } else {
 218                 req->cqe.res = ret;
 219                 req_set_fail(req);
 220         }
 221
 222         io_poll_remove_entries(req);
 223         spin_lock(&ctx->completion_lock);
 224         hash_del(&req->hash_node);
 225         req->cqe.flags = 0;
 226         __io_req_complete_post(req);
 227         io_commit_cqring(ctx);
 228         spin_unlock(&ctx->completion_lock);
 229         io_cqring_ev_posted(ctx);
 230 }
 231
 232 static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
 233 {
 234         struct io_ring_ctx *ctx = req->ctx;
 235         int ret;
 236
 237         ret = io_poll_check_events(req, locked);
 238         if (ret > 0)
 239                 return;
 240
 241         io_poll_remove_entries(req);
 242         spin_lock(&ctx->completion_lock);
 243         hash_del(&req->hash_node);
 244         spin_unlock(&ctx->completion_lock);
 245
 246         if (!ret)
 247                 io_req_task_submit(req, locked);
 248         else
 249                 io_req_complete_failed(req, ret);
 250 }
 251
 252 static void __io_poll_execute(struct io_kiocb *req, int mask,
 253                               __poll_t __maybe_unused events)
 254 {
 255         io_req_set_res(req, mask, 0);
 256         /*
 257          * This is useful for poll that is armed on behalf of another
 258          * request, and where the wakeup path could be on a different
 259          * CPU. We want to avoid pulling in req->apoll->events for that
 260          * case.
 261          */
 262         if (req->opcode == IORING_OP_POLL_ADD)
 263                 req->io_task_work.func = io_poll_task_func;
 264         else
 265                 req->io_task_work.func = io_apoll_task_func;
 266
 267         trace_io_uring_task_add(req->ctx, req, req->cqe.user_data, req->opcode, mask);
 268         io_req_task_work_add(req);
 269 }
 270
 271 static inline void io_poll_execute(struct io_kiocb *req, int res,
 272                 __poll_t events)
 273 {
 274         if (io_poll_get_ownership(req))
 275                 __io_poll_execute(req, res, events);
 276 }
 277
 278 static void io_poll_cancel_req(struct io_kiocb *req)
 279 {
 280         io_poll_mark_cancelled(req);
 281         /* kick tw, which should complete the request */
 282         io_poll_execute(req, 0, 0);
 283 }
 284
 285 #define wqe_to_req(wait)        ((void *)((unsigned long) (wait)->private & ~1))
 286 #define wqe_is_double(wait)     ((unsigned long) (wait)->private & 1)
 287 #define IO_ASYNC_POLL_COMMON    (EPOLLONESHOT | EPOLLPRI)
 288
 289 static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 290                         void *key)
 291 {
 292         struct io_kiocb *req = wqe_to_req(wait);
 293         struct io_poll *poll = container_of(wait, struct io_poll, wait);
 294         __poll_t mask = key_to_poll(key);
 295
 296         if (unlikely(mask & POLLFREE)) {
 297                 io_poll_mark_cancelled(req);
 298                 /* we have to kick tw in case it's not already */
 299                 io_poll_execute(req, 0, poll->events);
 300
 301                 /*
 302                  * If the waitqueue is being freed early but someone is already
 303                  * holds ownership over it, we have to tear down the request as
 304                  * best we can. That means immediately removing the request from
 305                  * its waitqueue and preventing all further accesses to the
 306                  * waitqueue via the request.
 307                  */
 308                 list_del_init(&poll->wait.entry);
 309
 310                 /*
 311                  * Careful: this *must* be the last step, since as soon
 312                  * as req->head is NULL'ed out, the request can be
 313                  * completed and freed, since aio_poll_complete_work()
 314                  * will no longer need to take the waitqueue lock.
 315                  */
 316                 smp_store_release(&poll->head, NULL);
 317                 return 1;
 318         }
 319
 320         /* for instances that support it check for an event match first */
 321         if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON)))
 322                 return 0;
 323
 324         if (io_poll_get_ownership(req)) {
 325                 /* optional, saves extra locking for removal in tw handler */
 326                 if (mask && poll->events & EPOLLONESHOT) {
 327                         list_del_init(&poll->wait.entry);
 328                         poll->head = NULL;
 329                         if (wqe_is_double(wait))
 330                                 req->flags &= ~REQ_F_DOUBLE_POLL;
 331                         else
 332                                 req->flags &= ~REQ_F_SINGLE_POLL;
 333                 }
 334                 __io_poll_execute(req, mask, poll->events);
 335         }
 336         return 1;
 337 }
 338
 339 static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
 340                             struct wait_queue_head *head,
 341                             struct io_poll **poll_ptr)
 342 {
 343         struct io_kiocb *req = pt->req;
 344         unsigned long wqe_private = (unsigned long) req;
 345
 346         /*
 347          * The file being polled uses multiple waitqueues for poll handling
 348          * (e.g. one for read, one for write). Setup a separate io_poll
 349          * if this happens.
 350          */
 351         if (unlikely(pt->nr_entries)) {
 352                 struct io_poll *first = poll;
 353
 354                 /* double add on the same waitqueue head, ignore */
 355                 if (first->head == head)
 356                         return;
 357                 /* already have a 2nd entry, fail a third attempt */
 358                 if (*poll_ptr) {
 359                         if ((*poll_ptr)->head == head)
 360                                 return;
 361                         pt->error = -EINVAL;
 362                         return;
 363                 }
 364
 365                 poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
 366                 if (!poll) {
 367                         pt->error = -ENOMEM;
 368                         return;
 369                 }
 370                 /* mark as double wq entry */
 371                 wqe_private |= 1;
 372                 req->flags |= REQ_F_DOUBLE_POLL;
 373                 io_init_poll_iocb(poll, first->events, first->wait.func);
 374                 *poll_ptr = poll;
 375                 if (req->opcode == IORING_OP_POLL_ADD)
 376                         req->flags |= REQ_F_ASYNC_DATA;
 377         }
 378
 379         req->flags |= REQ_F_SINGLE_POLL;
 380         pt->nr_entries++;
 381         poll->head = head;
 382         poll->wait.private = (void *) wqe_private;
 383
 384         if (poll->events & EPOLLEXCLUSIVE)
 385                 add_wait_queue_exclusive(head, &poll->wait);
 386         else
 387                 add_wait_queue(head, &poll->wait);
 388 }
 389
 390 static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
 391                                struct poll_table_struct *p)
 392 {
 393         struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
 394         struct io_poll *poll = io_kiocb_to_cmd(pt->req);
 395
 396         __io_queue_proc(poll, pt, head,
 397                         (struct io_poll **) &pt->req->async_data);
 398 }
 399
 400 static int __io_arm_poll_handler(struct io_kiocb *req,
 401                                  struct io_poll *poll,
 402                                  struct io_poll_table *ipt, __poll_t mask)
 403 {
 404         struct io_ring_ctx *ctx = req->ctx;
 405         int v;
 406
 407         INIT_HLIST_NODE(&req->hash_node);
 408         req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
 409         io_init_poll_iocb(poll, mask, io_poll_wake);
 410         poll->file = req->file;
 411
 412         req->apoll_events = poll->events;
 413
 414         ipt->pt._key = mask;
 415         ipt->req = req;
 416         ipt->error = 0;
 417         ipt->nr_entries = 0;
 418
 419         /*
 420          * Take the ownership to delay any tw execution up until we're done
 421          * with poll arming. see io_poll_get_ownership().
 422          */
 423         atomic_set(&req->poll_refs, 1);
 424         mask = vfs_poll(req->file, &ipt->pt) & poll->events;
 425
 426         if (mask &&
 427            ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) {
 428                 io_poll_remove_entries(req);
 429                 /* no one else has access to the req, forget about the ref */
 430                 return mask;
 431         }
 432
 433         if (!mask && unlikely(ipt->error || !ipt->nr_entries)) {
 434                 io_poll_remove_entries(req);
 435                 if (!ipt->error)
 436                         ipt->error = -EINVAL;
 437                 return 0;
 438         }
 439
 440         spin_lock(&ctx->completion_lock);
 441         io_poll_req_insert(req);
 442         spin_unlock(&ctx->completion_lock);
 443
 444         if (mask && (poll->events & EPOLLET)) {
 445                 /* can't multishot if failed, just queue the event we've got */
 446                 if (unlikely(ipt->error || !ipt->nr_entries)) {
 447                         poll->events |= EPOLLONESHOT;
 448                         req->apoll_events |= EPOLLONESHOT;
 449                         ipt->error = 0;
 450                 }
 451                 __io_poll_execute(req, mask, poll->events);
 452                 return 0;
 453         }
 454
 455         /*
 456          * Release ownership. If someone tried to queue a tw while it was
 457          * locked, kick it off for them.
 458          */
 459         v = atomic_dec_return(&req->poll_refs);
 460         if (unlikely(v & IO_POLL_REF_MASK))
 461                 __io_poll_execute(req, 0, poll->events);
 462         return 0;
 463 }
 464
 465 static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
 466                                struct poll_table_struct *p)
 467 {
 468         struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
 469         struct async_poll *apoll = pt->req->apoll;
 470
 471         __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
 472 }
 473
 474 int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
 475 {
 476         const struct io_op_def *def = &io_op_defs[req->opcode];
 477         struct io_ring_ctx *ctx = req->ctx;
 478         struct async_poll *apoll;
 479         struct io_poll_table ipt;
 480         __poll_t mask = POLLPRI | POLLERR | EPOLLET;
 481         int ret;
 482
 483         if (!def->pollin && !def->pollout)
 484                 return IO_APOLL_ABORTED;
 485         if (!file_can_poll(req->file))
 486                 return IO_APOLL_ABORTED;
 487         if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
 488                 return IO_APOLL_ABORTED;
 489         if (!(req->flags & REQ_F_APOLL_MULTISHOT))
 490                 mask |= EPOLLONESHOT;
 491
 492         if (def->pollin) {
 493                 mask |= EPOLLIN | EPOLLRDNORM;
 494
 495                 /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
 496                 if (req->flags & REQ_F_CLEAR_POLLIN)
 497                         mask &= ~EPOLLIN;
 498         } else {
 499                 mask |= EPOLLOUT | EPOLLWRNORM;
 500         }
 501         if (def->poll_exclusive)
 502                 mask |= EPOLLEXCLUSIVE;
 503         if (req->flags & REQ_F_POLLED) {
 504                 apoll = req->apoll;
 505                 kfree(apoll->double_poll);
 506         } else if (!(issue_flags & IO_URING_F_UNLOCKED) &&
 507                    !list_empty(&ctx->apoll_cache)) {
 508                 apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
 509                                                 poll.wait.entry);
 510                 list_del_init(&apoll->poll.wait.entry);
 511         } else {
 512                 apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
 513                 if (unlikely(!apoll))
 514                         return IO_APOLL_ABORTED;
 515         }
 516         apoll->double_poll = NULL;
 517         req->apoll = apoll;
 518         req->flags |= REQ_F_POLLED;
 519         ipt.pt._qproc = io_async_queue_proc;
 520
 521         io_kbuf_recycle(req, issue_flags);
 522
 523         ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask);
 524         if (ret || ipt.error)
 525                 return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
 526
 527         trace_io_uring_poll_arm(ctx, req, req->cqe.user_data, req->opcode,
 528                                 mask, apoll->poll.events);
 529         return IO_APOLL_OK;
 530 }
 531
 532 /*
 533  * Returns true if we found and killed one or more poll requests
 534  */
 535 __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
 536                                bool cancel_all)
 537 {
 538         struct hlist_node *tmp;
 539         struct io_kiocb *req;
 540         bool found = false;
 541         int i;
 542
 543         spin_lock(&ctx->completion_lock);
 544         for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
 545                 struct hlist_head *list;
 546
 547                 list = &ctx->cancel_hash[i];
 548                 hlist_for_each_entry_safe(req, tmp, list, hash_node) {
 549                         if (io_match_task_safe(req, tsk, cancel_all)) {
 550                                 hlist_del_init(&req->hash_node);
 551                                 io_poll_cancel_req(req);
 552                                 found = true;
 553                         }
 554                 }
 555         }
 556         spin_unlock(&ctx->completion_lock);
 557         return found;
 558 }
 559
 560 static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only,
 561                                      struct io_cancel_data *cd)
 562         __must_hold(&ctx->completion_lock)
 563 {
 564         struct hlist_head *list;
 565         struct io_kiocb *req;
 566
 567         list = &ctx->cancel_hash[hash_long(cd->data, ctx->cancel_hash_bits)];
 568         hlist_for_each_entry(req, list, hash_node) {
 569                 if (cd->data != req->cqe.user_data)
 570                         continue;
 571                 if (poll_only && req->opcode != IORING_OP_POLL_ADD)
 572                         continue;
 573                 if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
 574                         if (cd->seq == req->work.cancel_seq)
 575                                 continue;
 576                         req->work.cancel_seq = cd->seq;
 577                 }
 578                 return req;
 579         }
 580         return NULL;
 581 }
 582
 583 static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx,
 584                                           struct io_cancel_data *cd)
 585         __must_hold(&ctx->completion_lock)
 586 {
 587         struct io_kiocb *req;
 588         int i;
 589
 590         for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
 591                 struct hlist_head *list;
 592
 593                 list = &ctx->cancel_hash[i];
 594                 hlist_for_each_entry(req, list, hash_node) {
 595                         if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
 596                             req->file != cd->file)
 597                                 continue;
 598                         if (cd->seq == req->work.cancel_seq)
 599                                 continue;
 600                         req->work.cancel_seq = cd->seq;
 601                         return req;
 602                 }
 603         }
 604         return NULL;
 605 }
 606
 607 static bool io_poll_disarm(struct io_kiocb *req)
 608         __must_hold(&ctx->completion_lock)
 609 {
 610         if (!io_poll_get_ownership(req))
 611                 return false;
 612         io_poll_remove_entries(req);
 613         hash_del(&req->hash_node);
 614         return true;
 615 }
 616
 617 int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
 618         __must_hold(&ctx->completion_lock)
 619 {
 620         struct io_kiocb *req;
 621
 622         if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY))
 623                 req = io_poll_file_find(ctx, cd);
 624         else
 625                 req = io_poll_find(ctx, false, cd);
 626         if (!req)
 627                 return -ENOENT;
 628         io_poll_cancel_req(req);
 629         return 0;
 630 }
 631
 632 static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe,
 633                                      unsigned int flags)
 634 {
 635         u32 events;
 636
 637         events = READ_ONCE(sqe->poll32_events);
 638 #ifdef __BIG_ENDIAN
 639         events = swahw32(events);
 640 #endif
 641         if (!(flags & IORING_POLL_ADD_MULTI))
 642                 events |= EPOLLONESHOT;
 643         if (!(flags & IORING_POLL_ADD_LEVEL))
 644                 events |= EPOLLET;
 645         return demangle_poll(events) |
 646                 (events & (EPOLLEXCLUSIVE|EPOLLONESHOT|EPOLLET));
 647 }
 648
 649 int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 650 {
 651         struct io_poll_update *upd = io_kiocb_to_cmd(req);
 652         u32 flags;
 653
 654         if (sqe->buf_index || sqe->splice_fd_in)
 655                 return -EINVAL;
 656         flags = READ_ONCE(sqe->len);
 657         if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
 658                       IORING_POLL_ADD_MULTI))
 659                 return -EINVAL;
 660         /* meaningless without update */
 661         if (flags == IORING_POLL_ADD_MULTI)
 662                 return -EINVAL;
 663
 664         upd->old_user_data = READ_ONCE(sqe->addr);
 665         upd->update_events = flags & IORING_POLL_UPDATE_EVENTS;
 666         upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
 667
 668         upd->new_user_data = READ_ONCE(sqe->off);
 669         if (!upd->update_user_data && upd->new_user_data)
 670                 return -EINVAL;
 671         if (upd->update_events)
 672                 upd->events = io_poll_parse_events(sqe, flags);
 673         else if (sqe->poll32_events)
 674                 return -EINVAL;
 675
 676         return 0;
 677 }
 678
 679 int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 680 {
 681         struct io_poll *poll = io_kiocb_to_cmd(req);
 682         u32 flags;
 683
 684         if (sqe->buf_index || sqe->off || sqe->addr)
 685                 return -EINVAL;
 686         flags = READ_ONCE(sqe->len);
 687         if (flags & ~(IORING_POLL_ADD_MULTI|IORING_POLL_ADD_LEVEL))
 688                 return -EINVAL;
 689         if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP))
 690                 return -EINVAL;
 691
 692         poll->events = io_poll_parse_events(sqe, flags);
 693         return 0;
 694 }
 695
 696 int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
 697 {
 698         struct io_poll *poll = io_kiocb_to_cmd(req);
 699         struct io_poll_table ipt;
 700         int ret;
 701
 702         ipt.pt._qproc = io_poll_queue_proc;
 703
 704         ret = __io_arm_poll_handler(req, poll, &ipt, poll->events);
 705         if (ret) {
 706                 io_req_set_res(req, ret, 0);
 707                 return IOU_OK;
 708         }
 709         if (ipt.error) {
 710                 req_set_fail(req);
 711                 return ipt.error;
 712         }
 713
 714         return IOU_ISSUE_SKIP_COMPLETE;
 715 }
 716
 717 int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
 718 {
 719         struct io_poll_update *poll_update = io_kiocb_to_cmd(req);
 720         struct io_cancel_data cd = { .data = poll_update->old_user_data, };
 721         struct io_ring_ctx *ctx = req->ctx;
 722         struct io_kiocb *preq;
 723         int ret2, ret = 0;
 724         bool locked;
 725
 726         spin_lock(&ctx->completion_lock);
 727         preq = io_poll_find(ctx, true, &cd);
 728         if (!preq || !io_poll_disarm(preq)) {
 729                 spin_unlock(&ctx->completion_lock);
 730                 ret = preq ? -EALREADY : -ENOENT;
 731                 goto out;
 732         }
 733         spin_unlock(&ctx->completion_lock);
 734
 735         if (poll_update->update_events || poll_update->update_user_data) {
 736                 /* only mask one event flags, keep behavior flags */
 737                 if (poll_update->update_events) {
 738                         struct io_poll *poll = io_kiocb_to_cmd(preq);
 739
 740                         poll->events &= ~0xffff;
 741                         poll->events |= poll_update->events & 0xffff;
 742                         poll->events |= IO_POLL_UNMASK;
 743                 }
 744                 if (poll_update->update_user_data)
 745                         preq->cqe.user_data = poll_update->new_user_data;
 746
 747                 ret2 = io_poll_add(preq, issue_flags);
 748                 /* successfully updated, don't complete poll request */
 749                 if (!ret2 || ret2 == -EIOCBQUEUED)
 750                         goto out;
 751         }
 752
 753         req_set_fail(preq);
 754         io_req_set_res(preq, -ECANCELED, 0);
 755         locked = !(issue_flags & IO_URING_F_UNLOCKED);
 756         io_req_task_complete(preq, &locked);
 757 out:
 758         if (ret < 0) {
 759                 req_set_fail(req);
 760                 return ret;
 761         }
 762         /* complete update request, we're done with it */
 763         io_req_set_res(req, ret, 0);
 764         return IOU_OK;
 765 }