io_uring/poll.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 #include <linux/kernel.h>
   3 #include <linux/errno.h>
   4 #include <linux/fs.h>
   5 #include <linux/file.h>
   6 #include <linux/mm.h>
   7 #include <linux/slab.h>
   8 #include <linux/poll.h>
   9 #include <linux/hashtable.h>
  10 #include <linux/io_uring.h>
  11
  12 #include <trace/events/io_uring.h>
  13
  14 #include <uapi/linux/io_uring.h>
  15
  16 #include "io_uring_types.h"
  17 #include "io_uring.h"
  18 #include "refs.h"
  19 #include "opdef.h"
  20 #include "kbuf.h"
  21 #include "poll.h"
  22
  23 struct io_poll_update {
  24         struct file                     *file;
  25         u64                             old_user_data;
  26         u64                             new_user_data;
  27         __poll_t                        events;
  28         bool                            update_events;
  29         bool                            update_user_data;
  30 };
  31
  32 struct io_poll_table {
  33         struct poll_table_struct pt;
  34         struct io_kiocb *req;
  35         int nr_entries;
  36         int error;
  37 };
  38
  39 #define IO_POLL_CANCEL_FLAG     BIT(31)
  40 #define IO_POLL_REF_MASK        GENMASK(30, 0)
  41
  42 /*
  43  * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
  44  * bump it and acquire ownership. It's disallowed to modify requests while not
  45  * owning it, that prevents from races for enqueueing task_work's and b/w
  46  * arming poll and wakeups.
  47  */
  48 static inline bool io_poll_get_ownership(struct io_kiocb *req)
  49 {
  50         return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
  51 }
  52
  53 static void io_poll_mark_cancelled(struct io_kiocb *req)
  54 {
  55         atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs);
  56 }
  57
  58 static struct io_poll *io_poll_get_double(struct io_kiocb *req)
  59 {
  60         /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
  61         if (req->opcode == IORING_OP_POLL_ADD)
  62                 return req->async_data;
  63         return req->apoll->double_poll;
  64 }
  65
  66 static struct io_poll *io_poll_get_single(struct io_kiocb *req)
  67 {
  68         if (req->opcode == IORING_OP_POLL_ADD)
  69                 return io_kiocb_to_cmd(req);
  70         return &req->apoll->poll;
  71 }
  72
  73 static void io_poll_req_insert(struct io_kiocb *req)
  74 {
  75         struct io_ring_ctx *ctx = req->ctx;
  76         struct hlist_head *list;
  77
  78         list = &ctx->cancel_hash[hash_long(req->cqe.user_data, ctx->cancel_hash_bits)];
  79         hlist_add_head(&req->hash_node, list);
  80 }
  81
  82 static void io_init_poll_iocb(struct io_poll *poll, __poll_t events,
  83                               wait_queue_func_t wake_func)
  84 {
  85         poll->head = NULL;
  86 #define IO_POLL_UNMASK  (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
  87         /* mask in events that we always want/need */
  88         poll->events = events | IO_POLL_UNMASK;
  89         INIT_LIST_HEAD(&poll->wait.entry);
  90         init_waitqueue_func_entry(&poll->wait, wake_func);
  91 }
  92
  93 static inline void io_poll_remove_entry(struct io_poll *poll)
  94 {
  95         struct wait_queue_head *head = smp_load_acquire(&poll->head);
  96
  97         if (head) {
  98                 spin_lock_irq(&head->lock);
  99                 list_del_init(&poll->wait.entry);
 100                 poll->head = NULL;
 101                 spin_unlock_irq(&head->lock);
 102         }
 103 }
 104
 105 static void io_poll_remove_entries(struct io_kiocb *req)
 106 {
 107         /*
 108          * Nothing to do if neither of those flags are set. Avoid dipping
 109          * into the poll/apoll/double cachelines if we can.
 110          */
 111         if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL)))
 112                 return;
 113
 114         /*
 115          * While we hold the waitqueue lock and the waitqueue is nonempty,
 116          * wake_up_pollfree() will wait for us.  However, taking the waitqueue
 117          * lock in the first place can race with the waitqueue being freed.
 118          *
 119          * We solve this as eventpoll does: by taking advantage of the fact that
 120          * all users of wake_up_pollfree() will RCU-delay the actual free.  If
 121          * we enter rcu_read_lock() and see that the pointer to the queue is
 122          * non-NULL, we can then lock it without the memory being freed out from
 123          * under us.
 124          *
 125          * Keep holding rcu_read_lock() as long as we hold the queue lock, in
 126          * case the caller deletes the entry from the queue, leaving it empty.
 127          * In that case, only RCU prevents the queue memory from being freed.
 128          */
 129         rcu_read_lock();
 130         if (req->flags & REQ_F_SINGLE_POLL)
 131                 io_poll_remove_entry(io_poll_get_single(req));
 132         if (req->flags & REQ_F_DOUBLE_POLL)
 133                 io_poll_remove_entry(io_poll_get_double(req));
 134         rcu_read_unlock();
 135 }
 136
 137 /*
 138  * All poll tw should go through this. Checks for poll events, manages
 139  * references, does rewait, etc.
 140  *
 141  * Returns a negative error on failure. >0 when no action require, which is
 142  * either spurious wakeup or multishot CQE is served. 0 when it's done with
 143  * the request, then the mask is stored in req->cqe.res.
 144  */
 145 static int io_poll_check_events(struct io_kiocb *req, bool *locked)
 146 {
 147         struct io_ring_ctx *ctx = req->ctx;
 148         int v, ret;
 149
 150         /* req->task == current here, checking PF_EXITING is safe */
 151         if (unlikely(req->task->flags & PF_EXITING))
 152                 return -ECANCELED;
 153
 154         do {
 155                 v = atomic_read(&req->poll_refs);
 156
 157                 /* tw handler should be the owner, and so have some references */
 158                 if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK)))
 159                         return 0;
 160                 if (v & IO_POLL_CANCEL_FLAG)
 161                         return -ECANCELED;
 162
 163                 if (!req->cqe.res) {
 164                         struct poll_table_struct pt = { ._key = req->apoll_events };
 165                         req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
 166                 }
 167
 168                 if ((unlikely(!req->cqe.res)))
 169                         continue;
 170                 if (req->apoll_events & EPOLLONESHOT)
 171                         return 0;
 172
 173                 /* multishot, just fill a CQE and proceed */
 174                 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
 175                         __poll_t mask = mangle_poll(req->cqe.res &
 176                                                     req->apoll_events);
 177                         bool filled;
 178
 179                         spin_lock(&ctx->completion_lock);
 180                         filled = io_fill_cqe_aux(ctx, req->cqe.user_data,
 181                                                  mask, IORING_CQE_F_MORE);
 182                         io_commit_cqring(ctx);
 183                         spin_unlock(&ctx->completion_lock);
 184                         if (filled) {
 185                                 io_cqring_ev_posted(ctx);
 186                                 continue;
 187                         }
 188                         return -ECANCELED;
 189                 }
 190
 191                 ret = io_poll_issue(req, locked);
 192                 if (ret)
 193                         return ret;
 194
 195                 /*
 196                  * Release all references, retry if someone tried to restart
 197                  * task_work while we were executing it.
 198                  */
 199         } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs));
 200
 201         return 1;
 202 }
 203
 204 static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 205 {
 206         struct io_ring_ctx *ctx = req->ctx;
 207         int ret;
 208
 209         ret = io_poll_check_events(req, locked);
 210         if (ret > 0)
 211                 return;
 212
 213         if (!ret) {
 214                 struct io_poll *poll = io_kiocb_to_cmd(req);
 215
 216                 req->cqe.res = mangle_poll(req->cqe.res & poll->events);
 217         } else {
 218                 req->cqe.res = ret;
 219                 req_set_fail(req);
 220         }
 221
 222         io_poll_remove_entries(req);
 223         spin_lock(&ctx->completion_lock);
 224         hash_del(&req->hash_node);
 225         req->cqe.flags = 0;
 226         __io_req_complete_post(req);
 227         io_commit_cqring(ctx);
 228         spin_unlock(&ctx->completion_lock);
 229         io_cqring_ev_posted(ctx);
 230 }
 231
 232 static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
 233 {
 234         struct io_ring_ctx *ctx = req->ctx;
 235         int ret;
 236
 237         ret = io_poll_check_events(req, locked);
 238         if (ret > 0)
 239                 return;
 240
 241         io_poll_remove_entries(req);
 242         spin_lock(&ctx->completion_lock);
 243         hash_del(&req->hash_node);
 244         spin_unlock(&ctx->completion_lock);
 245
 246         if (!ret)
 247                 io_req_task_submit(req, locked);
 248         else
 249                 io_req_complete_failed(req, ret);
 250 }
 251
 252 static void __io_poll_execute(struct io_kiocb *req, int mask,
 253                               __poll_t __maybe_unused events)
 254 {
 255         io_req_set_res(req, mask, 0);
 256         /*
 257          * This is useful for poll that is armed on behalf of another
 258          * request, and where the wakeup path could be on a different
 259          * CPU. We want to avoid pulling in req->apoll->events for that
 260          * case.
 261          */
 262         if (req->opcode == IORING_OP_POLL_ADD)
 263                 req->io_task_work.func = io_poll_task_func;
 264         else
 265                 req->io_task_work.func = io_apoll_task_func;
 266
 267         trace_io_uring_task_add(req->ctx, req, req->cqe.user_data, req->opcode, mask);
 268         io_req_task_work_add(req);
 269 }
 270
 271 static inline void io_poll_execute(struct io_kiocb *req, int res,
 272                 __poll_t events)
 273 {
 274         if (io_poll_get_ownership(req))
 275                 __io_poll_execute(req, res, events);
 276 }
 277
 278 static void io_poll_cancel_req(struct io_kiocb *req)
 279 {
 280         io_poll_mark_cancelled(req);
 281         /* kick tw, which should complete the request */
 282         io_poll_execute(req, 0, 0);
 283 }
 284
 285 #define wqe_to_req(wait)        ((void *)((unsigned long) (wait)->private & ~1))
 286 #define wqe_is_double(wait)     ((unsigned long) (wait)->private & 1)
 287 #define IO_ASYNC_POLL_COMMON    (EPOLLONESHOT | EPOLLPRI)
 288
 289 static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 290                         void *key)
 291 {
 292         struct io_kiocb *req = wqe_to_req(wait);
 293         struct io_poll *poll = container_of(wait, struct io_poll, wait);
 294         __poll_t mask = key_to_poll(key);
 295
 296         if (unlikely(mask & POLLFREE)) {
 297                 io_poll_mark_cancelled(req);
 298                 /* we have to kick tw in case it's not already */
 299                 io_poll_execute(req, 0, poll->events);
 300
 301                 /*
 302                  * If the waitqueue is being freed early but someone is already
 303                  * holds ownership over it, we have to tear down the request as
 304                  * best we can. That means immediately removing the request from
 305                  * its waitqueue and preventing all further accesses to the
 306                  * waitqueue via the request.
 307                  */
 308                 list_del_init(&poll->wait.entry);
 309
 310                 /*
 311                  * Careful: this *must* be the last step, since as soon
 312                  * as req->head is NULL'ed out, the request can be
 313                  * completed and freed, since aio_poll_complete_work()
 314                  * will no longer need to take the waitqueue lock.
 315                  */
 316                 smp_store_release(&poll->head, NULL);
 317                 return 1;
 318         }
 319
 320         /* for instances that support it check for an event match first */
 321         if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON)))
 322                 return 0;
 323
 324         if (io_poll_get_ownership(req)) {
 325                 /* optional, saves extra locking for removal in tw handler */
 326                 if (mask && poll->events & EPOLLONESHOT) {
 327                         list_del_init(&poll->wait.entry);
 328                         poll->head = NULL;
 329                         if (wqe_is_double(wait))
 330                                 req->flags &= ~REQ_F_DOUBLE_POLL;
 331                         else
 332                                 req->flags &= ~REQ_F_SINGLE_POLL;
 333                 }
 334                 __io_poll_execute(req, mask, poll->events);
 335         }
 336         return 1;
 337 }
 338
 339 static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
 340                             struct wait_queue_head *head,
 341                             struct io_poll **poll_ptr)
 342 {
 343         struct io_kiocb *req = pt->req;
 344         unsigned long wqe_private = (unsigned long) req;
 345
 346         /*
 347          * The file being polled uses multiple waitqueues for poll handling
 348          * (e.g. one for read, one for write). Setup a separate io_poll
 349          * if this happens.
 350          */
 351         if (unlikely(pt->nr_entries)) {
 352                 struct io_poll *first = poll;
 353
 354                 /* double add on the same waitqueue head, ignore */
 355                 if (first->head == head)
 356                         return;
 357                 /* already have a 2nd entry, fail a third attempt */
 358                 if (*poll_ptr) {
 359                         if ((*poll_ptr)->head == head)
 360                                 return;
 361                         pt->error = -EINVAL;
 362                         return;
 363                 }
 364
 365                 poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
 366                 if (!poll) {
 367                         pt->error = -ENOMEM;
 368                         return;
 369                 }
 370                 /* mark as double wq entry */
 371                 wqe_private |= 1;
 372                 req->flags |= REQ_F_DOUBLE_POLL;
 373                 io_init_poll_iocb(poll, first->events, first->wait.func);
 374                 *poll_ptr = poll;
 375                 if (req->opcode == IORING_OP_POLL_ADD)
 376                         req->flags |= REQ_F_ASYNC_DATA;
 377         }
 378
 379         req->flags |= REQ_F_SINGLE_POLL;
 380         pt->nr_entries++;
 381         poll->head = head;
 382         poll->wait.private = (void *) wqe_private;
 383
 384         if (poll->events & EPOLLEXCLUSIVE)
 385                 add_wait_queue_exclusive(head, &poll->wait);
 386         else
 387                 add_wait_queue(head, &poll->wait);
 388 }
 389
 390 static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
 391                                struct poll_table_struct *p)
 392 {
 393         struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
 394         struct io_poll *poll = io_kiocb_to_cmd(pt->req);
 395
 396         __io_queue_proc(poll, pt, head,
 397                         (struct io_poll **) &pt->req->async_data);
 398 }
 399
 400 static int __io_arm_poll_handler(struct io_kiocb *req,
 401                                  struct io_poll *poll,
 402                                  struct io_poll_table *ipt, __poll_t mask)
 403 {
 404         struct io_ring_ctx *ctx = req->ctx;
 405         int v;
 406
 407         INIT_HLIST_NODE(&req->hash_node);
 408         req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
 409         io_init_poll_iocb(poll, mask, io_poll_wake);
 410         poll->file = req->file;
 411
 412         req->apoll_events = poll->events;
 413
 414         ipt->pt._key = mask;
 415         ipt->req = req;
 416         ipt->error = 0;
 417         ipt->nr_entries = 0;
 418
 419         /*
 420          * Take the ownership to delay any tw execution up until we're done
 421          * with poll arming. see io_poll_get_ownership().
 422          */
 423         atomic_set(&req->poll_refs, 1);
 424         mask = vfs_poll(req->file, &ipt->pt) & poll->events;
 425
 426         if (mask && (poll->events & EPOLLONESHOT)) {
 427                 io_poll_remove_entries(req);
 428                 /* no one else has access to the req, forget about the ref */
 429                 return mask;
 430         }
 431         if (!mask && unlikely(ipt->error || !ipt->nr_entries)) {
 432                 io_poll_remove_entries(req);
 433                 if (!ipt->error)
 434                         ipt->error = -EINVAL;
 435                 return 0;
 436         }
 437
 438         spin_lock(&ctx->completion_lock);
 439         io_poll_req_insert(req);
 440         spin_unlock(&ctx->completion_lock);
 441
 442         if (mask) {
 443                 /* can't multishot if failed, just queue the event we've got */
 444                 if (unlikely(ipt->error || !ipt->nr_entries)) {
 445                         poll->events |= EPOLLONESHOT;
 446                         req->apoll_events |= EPOLLONESHOT;
 447                         ipt->error = 0;
 448                 }
 449                 __io_poll_execute(req, mask, poll->events);
 450                 return 0;
 451         }
 452
 453         /*
 454          * Release ownership. If someone tried to queue a tw while it was
 455          * locked, kick it off for them.
 456          */
 457         v = atomic_dec_return(&req->poll_refs);
 458         if (unlikely(v & IO_POLL_REF_MASK))
 459                 __io_poll_execute(req, 0, poll->events);
 460         return 0;
 461 }
 462
 463 static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
 464                                struct poll_table_struct *p)
 465 {
 466         struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
 467         struct async_poll *apoll = pt->req->apoll;
 468
 469         __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
 470 }
 471
 472 int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
 473 {
 474         const struct io_op_def *def = &io_op_defs[req->opcode];
 475         struct io_ring_ctx *ctx = req->ctx;
 476         struct async_poll *apoll;
 477         struct io_poll_table ipt;
 478         __poll_t mask = POLLPRI | POLLERR;
 479         int ret;
 480
 481         if (!def->pollin && !def->pollout)
 482                 return IO_APOLL_ABORTED;
 483         if (!file_can_poll(req->file))
 484                 return IO_APOLL_ABORTED;
 485         if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
 486                 return IO_APOLL_ABORTED;
 487         if (!(req->flags & REQ_F_APOLL_MULTISHOT))
 488                 mask |= EPOLLONESHOT;
 489
 490         if (def->pollin) {
 491                 mask |= EPOLLIN | EPOLLRDNORM;
 492
 493                 /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
 494                 if (req->flags & REQ_F_CLEAR_POLLIN)
 495                         mask &= ~EPOLLIN;
 496         } else {
 497                 mask |= EPOLLOUT | EPOLLWRNORM;
 498         }
 499         if (def->poll_exclusive)
 500                 mask |= EPOLLEXCLUSIVE;
 501         if (req->flags & REQ_F_POLLED) {
 502                 apoll = req->apoll;
 503                 kfree(apoll->double_poll);
 504         } else if (!(issue_flags & IO_URING_F_UNLOCKED) &&
 505                    !list_empty(&ctx->apoll_cache)) {
 506                 apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
 507                                                 poll.wait.entry);
 508                 list_del_init(&apoll->poll.wait.entry);
 509         } else {
 510                 apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
 511                 if (unlikely(!apoll))
 512                         return IO_APOLL_ABORTED;
 513         }
 514         apoll->double_poll = NULL;
 515         req->apoll = apoll;
 516         req->flags |= REQ_F_POLLED;
 517         ipt.pt._qproc = io_async_queue_proc;
 518
 519         io_kbuf_recycle(req, issue_flags);
 520
 521         ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask);
 522         if (ret || ipt.error)
 523                 return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
 524
 525         trace_io_uring_poll_arm(ctx, req, req->cqe.user_data, req->opcode,
 526                                 mask, apoll->poll.events);
 527         return IO_APOLL_OK;
 528 }
 529
 530 /*
 531  * Returns true if we found and killed one or more poll requests
 532  */
 533 __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
 534                                bool cancel_all)
 535 {
 536         struct hlist_node *tmp;
 537         struct io_kiocb *req;
 538         bool found = false;
 539         int i;
 540
 541         spin_lock(&ctx->completion_lock);
 542         for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
 543                 struct hlist_head *list;
 544
 545                 list = &ctx->cancel_hash[i];
 546                 hlist_for_each_entry_safe(req, tmp, list, hash_node) {
 547                         if (io_match_task_safe(req, tsk, cancel_all)) {
 548                                 hlist_del_init(&req->hash_node);
 549                                 io_poll_cancel_req(req);
 550                                 found = true;
 551                         }
 552                 }
 553         }
 554         spin_unlock(&ctx->completion_lock);
 555         return found;
 556 }
 557
 558 static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only,
 559                                      struct io_cancel_data *cd)
 560         __must_hold(&ctx->completion_lock)
 561 {
 562         struct hlist_head *list;
 563         struct io_kiocb *req;
 564
 565         list = &ctx->cancel_hash[hash_long(cd->data, ctx->cancel_hash_bits)];
 566         hlist_for_each_entry(req, list, hash_node) {
 567                 if (cd->data != req->cqe.user_data)
 568                         continue;
 569                 if (poll_only && req->opcode != IORING_OP_POLL_ADD)
 570                         continue;
 571                 if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
 572                         if (cd->seq == req->work.cancel_seq)
 573                                 continue;
 574                         req->work.cancel_seq = cd->seq;
 575                 }
 576                 return req;
 577         }
 578         return NULL;
 579 }
 580
 581 static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx,
 582                                           struct io_cancel_data *cd)
 583         __must_hold(&ctx->completion_lock)
 584 {
 585         struct io_kiocb *req;
 586         int i;
 587
 588         for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
 589                 struct hlist_head *list;
 590
 591                 list = &ctx->cancel_hash[i];
 592                 hlist_for_each_entry(req, list, hash_node) {
 593                         if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
 594                             req->file != cd->file)
 595                                 continue;
 596                         if (cd->seq == req->work.cancel_seq)
 597                                 continue;
 598                         req->work.cancel_seq = cd->seq;
 599                         return req;
 600                 }
 601         }
 602         return NULL;
 603 }
 604
 605 static bool io_poll_disarm(struct io_kiocb *req)
 606         __must_hold(&ctx->completion_lock)
 607 {
 608         if (!io_poll_get_ownership(req))
 609                 return false;
 610         io_poll_remove_entries(req);
 611         hash_del(&req->hash_node);
 612         return true;
 613 }
 614
 615 int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
 616         __must_hold(&ctx->completion_lock)
 617 {
 618         struct io_kiocb *req;
 619
 620         if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY))
 621                 req = io_poll_file_find(ctx, cd);
 622         else
 623                 req = io_poll_find(ctx, false, cd);
 624         if (!req)
 625                 return -ENOENT;
 626         io_poll_cancel_req(req);
 627         return 0;
 628 }
 629
 630 static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe,
 631                                      unsigned int flags)
 632 {
 633         u32 events;
 634
 635         events = READ_ONCE(sqe->poll32_events);
 636 #ifdef __BIG_ENDIAN
 637         events = swahw32(events);
 638 #endif
 639         if (!(flags & IORING_POLL_ADD_MULTI))
 640                 events |= EPOLLONESHOT;
 641         return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
 642 }
 643
 644 int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 645 {
 646         struct io_poll_update *upd = io_kiocb_to_cmd(req);
 647         u32 flags;
 648
 649         if (sqe->buf_index || sqe->splice_fd_in)
 650                 return -EINVAL;
 651         flags = READ_ONCE(sqe->len);
 652         if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
 653                       IORING_POLL_ADD_MULTI))
 654                 return -EINVAL;
 655         /* meaningless without update */
 656         if (flags == IORING_POLL_ADD_MULTI)
 657                 return -EINVAL;
 658
 659         upd->old_user_data = READ_ONCE(sqe->addr);
 660         upd->update_events = flags & IORING_POLL_UPDATE_EVENTS;
 661         upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
 662
 663         upd->new_user_data = READ_ONCE(sqe->off);
 664         if (!upd->update_user_data && upd->new_user_data)
 665                 return -EINVAL;
 666         if (upd->update_events)
 667                 upd->events = io_poll_parse_events(sqe, flags);
 668         else if (sqe->poll32_events)
 669                 return -EINVAL;
 670
 671         return 0;
 672 }
 673
 674 int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 675 {
 676         struct io_poll *poll = io_kiocb_to_cmd(req);
 677         u32 flags;
 678
 679         if (sqe->buf_index || sqe->off || sqe->addr)
 680                 return -EINVAL;
 681         flags = READ_ONCE(sqe->len);
 682         if (flags & ~IORING_POLL_ADD_MULTI)
 683                 return -EINVAL;
 684         if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP))
 685                 return -EINVAL;
 686
 687         io_req_set_refcount(req);
 688         poll->events = io_poll_parse_events(sqe, flags);
 689         return 0;
 690 }
 691
 692 int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
 693 {
 694         struct io_poll *poll = io_kiocb_to_cmd(req);
 695         struct io_poll_table ipt;
 696         int ret;
 697
 698         ipt.pt._qproc = io_poll_queue_proc;
 699
 700         ret = __io_arm_poll_handler(req, poll, &ipt, poll->events);
 701         if (ret) {
 702                 io_req_set_res(req, ret, 0);
 703                 return IOU_OK;
 704         }
 705         if (ipt.error) {
 706                 req_set_fail(req);
 707                 return ipt.error;
 708         }
 709
 710         return IOU_ISSUE_SKIP_COMPLETE;
 711 }
 712
 713 int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
 714 {
 715         struct io_poll_update *poll_update = io_kiocb_to_cmd(req);
 716         struct io_cancel_data cd = { .data = poll_update->old_user_data, };
 717         struct io_ring_ctx *ctx = req->ctx;
 718         struct io_kiocb *preq;
 719         int ret2, ret = 0;
 720         bool locked;
 721
 722         spin_lock(&ctx->completion_lock);
 723         preq = io_poll_find(ctx, true, &cd);
 724         if (!preq || !io_poll_disarm(preq)) {
 725                 spin_unlock(&ctx->completion_lock);
 726                 ret = preq ? -EALREADY : -ENOENT;
 727                 goto out;
 728         }
 729         spin_unlock(&ctx->completion_lock);
 730
 731         if (poll_update->update_events || poll_update->update_user_data) {
 732                 /* only mask one event flags, keep behavior flags */
 733                 if (poll_update->update_events) {
 734                         struct io_poll *poll = io_kiocb_to_cmd(preq);
 735
 736                         poll->events &= ~0xffff;
 737                         poll->events |= poll_update->events & 0xffff;
 738                         poll->events |= IO_POLL_UNMASK;
 739                 }
 740                 if (poll_update->update_user_data)
 741                         preq->cqe.user_data = poll_update->new_user_data;
 742
 743                 ret2 = io_poll_add(preq, issue_flags);
 744                 /* successfully updated, don't complete poll request */
 745                 if (!ret2 || ret2 == -EIOCBQUEUED)
 746                         goto out;
 747         }
 748
 749         req_set_fail(preq);
 750         io_req_set_res(preq, -ECANCELED, 0);
 751         locked = !(issue_flags & IO_URING_F_UNLOCKED);
 752         io_req_task_complete(preq, &locked);
 753 out:
 754         if (ret < 0) {
 755                 req_set_fail(req);
 756                 return ret;
 757         }
 758         /* complete update request, we're done with it */
 759         io_req_set_res(req, ret, 0);
 760         return IOU_OK;
 761 }