io_uring: improve task exit timeout cancellations
[linux-block.git] / io_uring / poll.c
CommitLineData
329061d3
JA
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/fs.h>
5#include <linux/file.h>
6#include <linux/mm.h>
7#include <linux/slab.h>
8#include <linux/poll.h>
9#include <linux/hashtable.h>
10#include <linux/io_uring.h>
11
12#include <trace/events/io_uring.h>
13
14#include <uapi/linux/io_uring.h>
15
329061d3
JA
16#include "io_uring.h"
17#include "refs.h"
18#include "opdef.h"
3b77495a 19#include "kbuf.h"
329061d3 20#include "poll.h"
38513c46 21#include "cancel.h"
329061d3
JA
22
23struct io_poll_update {
24 struct file *file;
25 u64 old_user_data;
26 u64 new_user_data;
27 __poll_t events;
28 bool update_events;
29 bool update_user_data;
30};
31
32struct io_poll_table {
33 struct poll_table_struct pt;
34 struct io_kiocb *req;
35 int nr_entries;
36 int error;
37};
38
39#define IO_POLL_CANCEL_FLAG BIT(31)
40#define IO_POLL_REF_MASK GENMASK(30, 0)
41
42/*
43 * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
44 * bump it and acquire ownership. It's disallowed to modify requests while not
45 * owning it, that prevents from races for enqueueing task_work's and b/w
46 * arming poll and wakeups.
47 */
48static inline bool io_poll_get_ownership(struct io_kiocb *req)
49{
50 return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
51}
52
53static void io_poll_mark_cancelled(struct io_kiocb *req)
54{
55 atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs);
56}
57
58static struct io_poll *io_poll_get_double(struct io_kiocb *req)
59{
60 /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
61 if (req->opcode == IORING_OP_POLL_ADD)
62 return req->async_data;
63 return req->apoll->double_poll;
64}
65
66static struct io_poll *io_poll_get_single(struct io_kiocb *req)
67{
68 if (req->opcode == IORING_OP_POLL_ADD)
69 return io_kiocb_to_cmd(req);
70 return &req->apoll->poll;
71}
72
73static void io_poll_req_insert(struct io_kiocb *req)
74{
e6f89be6
PB
75 struct io_hash_table *table = &req->ctx->cancel_table;
76 u32 index = hash_long(req->cqe.user_data, table->hash_bits);
77 struct io_hash_bucket *hb = &table->hbs[index];
329061d3 78
38513c46
HX
79 spin_lock(&hb->lock);
80 hlist_add_head(&req->hash_node, &hb->list);
81 spin_unlock(&hb->lock);
82}
83
84static void io_poll_req_delete(struct io_kiocb *req, struct io_ring_ctx *ctx)
85{
e6f89be6
PB
86 struct io_hash_table *table = &req->ctx->cancel_table;
87 u32 index = hash_long(req->cqe.user_data, table->hash_bits);
88 spinlock_t *lock = &table->hbs[index].lock;
38513c46
HX
89
90 spin_lock(lock);
91 hash_del(&req->hash_node);
92 spin_unlock(lock);
329061d3
JA
93}
94
9ca9fb24
PB
95static void io_poll_req_insert_locked(struct io_kiocb *req)
96{
97 struct io_hash_table *table = &req->ctx->cancel_table_locked;
98 u32 index = hash_long(req->cqe.user_data, table->hash_bits);
99
100 hlist_add_head(&req->hash_node, &table->hbs[index].list);
101}
102
103static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked)
104{
105 struct io_ring_ctx *ctx = req->ctx;
106
107 if (req->flags & REQ_F_HASH_LOCKED) {
108 /*
109 * ->cancel_table_locked is protected by ->uring_lock in
110 * contrast to per bucket spinlocks. Likely, tctx_task_work()
111 * already grabbed the mutex for us, but there is a chance it
112 * failed.
113 */
114 io_tw_lock(ctx, locked);
115 hash_del(&req->hash_node);
116 } else {
117 io_poll_req_delete(req, ctx);
118 }
119}
120
329061d3
JA
121static void io_init_poll_iocb(struct io_poll *poll, __poll_t events,
122 wait_queue_func_t wake_func)
123{
124 poll->head = NULL;
125#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
126 /* mask in events that we always want/need */
127 poll->events = events | IO_POLL_UNMASK;
128 INIT_LIST_HEAD(&poll->wait.entry);
129 init_waitqueue_func_entry(&poll->wait, wake_func);
130}
131
132static inline void io_poll_remove_entry(struct io_poll *poll)
133{
134 struct wait_queue_head *head = smp_load_acquire(&poll->head);
135
136 if (head) {
137 spin_lock_irq(&head->lock);
138 list_del_init(&poll->wait.entry);
139 poll->head = NULL;
140 spin_unlock_irq(&head->lock);
141 }
142}
143
144static void io_poll_remove_entries(struct io_kiocb *req)
145{
146 /*
147 * Nothing to do if neither of those flags are set. Avoid dipping
148 * into the poll/apoll/double cachelines if we can.
149 */
150 if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL)))
151 return;
152
153 /*
154 * While we hold the waitqueue lock and the waitqueue is nonempty,
155 * wake_up_pollfree() will wait for us. However, taking the waitqueue
156 * lock in the first place can race with the waitqueue being freed.
157 *
158 * We solve this as eventpoll does: by taking advantage of the fact that
159 * all users of wake_up_pollfree() will RCU-delay the actual free. If
160 * we enter rcu_read_lock() and see that the pointer to the queue is
161 * non-NULL, we can then lock it without the memory being freed out from
162 * under us.
163 *
164 * Keep holding rcu_read_lock() as long as we hold the queue lock, in
165 * case the caller deletes the entry from the queue, leaving it empty.
166 * In that case, only RCU prevents the queue memory from being freed.
167 */
168 rcu_read_lock();
169 if (req->flags & REQ_F_SINGLE_POLL)
170 io_poll_remove_entry(io_poll_get_single(req));
171 if (req->flags & REQ_F_DOUBLE_POLL)
172 io_poll_remove_entry(io_poll_get_double(req));
173 rcu_read_unlock();
174}
175
176/*
177 * All poll tw should go through this. Checks for poll events, manages
178 * references, does rewait, etc.
179 *
180 * Returns a negative error on failure. >0 when no action require, which is
181 * either spurious wakeup or multishot CQE is served. 0 when it's done with
182 * the request, then the mask is stored in req->cqe.res.
183 */
184static int io_poll_check_events(struct io_kiocb *req, bool *locked)
185{
186 struct io_ring_ctx *ctx = req->ctx;
187 int v, ret;
188
189 /* req->task == current here, checking PF_EXITING is safe */
190 if (unlikely(req->task->flags & PF_EXITING))
191 return -ECANCELED;
192
193 do {
194 v = atomic_read(&req->poll_refs);
195
196 /* tw handler should be the owner, and so have some references */
197 if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK)))
198 return 0;
199 if (v & IO_POLL_CANCEL_FLAG)
200 return -ECANCELED;
201
202 if (!req->cqe.res) {
203 struct poll_table_struct pt = { ._key = req->apoll_events };
204 req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
205 }
206
207 if ((unlikely(!req->cqe.res)))
208 continue;
209 if (req->apoll_events & EPOLLONESHOT)
210 return 0;
211
212 /* multishot, just fill a CQE and proceed */
213 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
214 __poll_t mask = mangle_poll(req->cqe.res &
215 req->apoll_events);
329061d3 216
d245bca6
PB
217 if (!io_post_aux_cqe(ctx, req->cqe.user_data,
218 mask, IORING_CQE_F_MORE))
219 return -ECANCELED;
220 } else {
221 ret = io_poll_issue(req, locked);
222 if (ret)
223 return ret;
224 }
329061d3
JA
225
226 /*
227 * Release all references, retry if someone tried to restart
228 * task_work while we were executing it.
229 */
230 } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs));
231
232 return 1;
233}
234
235static void io_poll_task_func(struct io_kiocb *req, bool *locked)
236{
329061d3
JA
237 int ret;
238
239 ret = io_poll_check_events(req, locked);
240 if (ret > 0)
241 return;
242
243 if (!ret) {
244 struct io_poll *poll = io_kiocb_to_cmd(req);
245
246 req->cqe.res = mangle_poll(req->cqe.res & poll->events);
247 } else {
248 req->cqe.res = ret;
249 req_set_fail(req);
250 }
251
252 io_poll_remove_entries(req);
9ca9fb24
PB
253 io_poll_tw_hash_eject(req, locked);
254
0ec6dca2
PB
255 io_req_set_res(req, req->cqe.res, 0);
256 io_req_task_complete(req, locked);
329061d3
JA
257}
258
259static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
260{
329061d3
JA
261 int ret;
262
263 ret = io_poll_check_events(req, locked);
264 if (ret > 0)
265 return;
266
267 io_poll_remove_entries(req);
9ca9fb24 268 io_poll_tw_hash_eject(req, locked);
329061d3
JA
269
270 if (!ret)
271 io_req_task_submit(req, locked);
272 else
273 io_req_complete_failed(req, ret);
274}
275
276static void __io_poll_execute(struct io_kiocb *req, int mask,
277 __poll_t __maybe_unused events)
278{
279 io_req_set_res(req, mask, 0);
280 /*
281 * This is useful for poll that is armed on behalf of another
282 * request, and where the wakeup path could be on a different
283 * CPU. We want to avoid pulling in req->apoll->events for that
284 * case.
285 */
286 if (req->opcode == IORING_OP_POLL_ADD)
287 req->io_task_work.func = io_poll_task_func;
288 else
289 req->io_task_work.func = io_apoll_task_func;
290
48863ffd 291 trace_io_uring_task_add(req, mask);
329061d3
JA
292 io_req_task_work_add(req);
293}
294
295static inline void io_poll_execute(struct io_kiocb *req, int res,
296 __poll_t events)
297{
298 if (io_poll_get_ownership(req))
299 __io_poll_execute(req, res, events);
300}
301
302static void io_poll_cancel_req(struct io_kiocb *req)
303{
304 io_poll_mark_cancelled(req);
305 /* kick tw, which should complete the request */
306 io_poll_execute(req, 0, 0);
307}
308
309#define wqe_to_req(wait) ((void *)((unsigned long) (wait)->private & ~1))
310#define wqe_is_double(wait) ((unsigned long) (wait)->private & 1)
311#define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI)
312
313static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
314 void *key)
315{
316 struct io_kiocb *req = wqe_to_req(wait);
317 struct io_poll *poll = container_of(wait, struct io_poll, wait);
318 __poll_t mask = key_to_poll(key);
319
320 if (unlikely(mask & POLLFREE)) {
321 io_poll_mark_cancelled(req);
322 /* we have to kick tw in case it's not already */
323 io_poll_execute(req, 0, poll->events);
324
325 /*
326 * If the waitqueue is being freed early but someone is already
327 * holds ownership over it, we have to tear down the request as
328 * best we can. That means immediately removing the request from
329 * its waitqueue and preventing all further accesses to the
330 * waitqueue via the request.
331 */
332 list_del_init(&poll->wait.entry);
333
334 /*
335 * Careful: this *must* be the last step, since as soon
336 * as req->head is NULL'ed out, the request can be
337 * completed and freed, since aio_poll_complete_work()
338 * will no longer need to take the waitqueue lock.
339 */
340 smp_store_release(&poll->head, NULL);
341 return 1;
342 }
343
344 /* for instances that support it check for an event match first */
345 if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON)))
346 return 0;
347
348 if (io_poll_get_ownership(req)) {
349 /* optional, saves extra locking for removal in tw handler */
350 if (mask && poll->events & EPOLLONESHOT) {
351 list_del_init(&poll->wait.entry);
352 poll->head = NULL;
353 if (wqe_is_double(wait))
354 req->flags &= ~REQ_F_DOUBLE_POLL;
355 else
356 req->flags &= ~REQ_F_SINGLE_POLL;
357 }
358 __io_poll_execute(req, mask, poll->events);
359 }
360 return 1;
361}
362
363static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
364 struct wait_queue_head *head,
365 struct io_poll **poll_ptr)
366{
367 struct io_kiocb *req = pt->req;
368 unsigned long wqe_private = (unsigned long) req;
369
370 /*
371 * The file being polled uses multiple waitqueues for poll handling
372 * (e.g. one for read, one for write). Setup a separate io_poll
373 * if this happens.
374 */
375 if (unlikely(pt->nr_entries)) {
376 struct io_poll *first = poll;
377
378 /* double add on the same waitqueue head, ignore */
379 if (first->head == head)
380 return;
381 /* already have a 2nd entry, fail a third attempt */
382 if (*poll_ptr) {
383 if ((*poll_ptr)->head == head)
384 return;
385 pt->error = -EINVAL;
386 return;
387 }
388
389 poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
390 if (!poll) {
391 pt->error = -ENOMEM;
392 return;
393 }
394 /* mark as double wq entry */
395 wqe_private |= 1;
396 req->flags |= REQ_F_DOUBLE_POLL;
397 io_init_poll_iocb(poll, first->events, first->wait.func);
398 *poll_ptr = poll;
399 if (req->opcode == IORING_OP_POLL_ADD)
400 req->flags |= REQ_F_ASYNC_DATA;
401 }
402
403 req->flags |= REQ_F_SINGLE_POLL;
404 pt->nr_entries++;
405 poll->head = head;
406 poll->wait.private = (void *) wqe_private;
407
408 if (poll->events & EPOLLEXCLUSIVE)
409 add_wait_queue_exclusive(head, &poll->wait);
410 else
411 add_wait_queue(head, &poll->wait);
412}
413
414static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
415 struct poll_table_struct *p)
416{
417 struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
418 struct io_poll *poll = io_kiocb_to_cmd(pt->req);
419
420 __io_queue_proc(poll, pt, head,
421 (struct io_poll **) &pt->req->async_data);
422}
423
424static int __io_arm_poll_handler(struct io_kiocb *req,
425 struct io_poll *poll,
426 struct io_poll_table *ipt, __poll_t mask)
427{
428 struct io_ring_ctx *ctx = req->ctx;
429 int v;
430
431 INIT_HLIST_NODE(&req->hash_node);
432 req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
433 io_init_poll_iocb(poll, mask, io_poll_wake);
434 poll->file = req->file;
435
436 req->apoll_events = poll->events;
437
438 ipt->pt._key = mask;
439 ipt->req = req;
440 ipt->error = 0;
441 ipt->nr_entries = 0;
442
443 /*
444 * Take the ownership to delay any tw execution up until we're done
445 * with poll arming. see io_poll_get_ownership().
446 */
447 atomic_set(&req->poll_refs, 1);
448 mask = vfs_poll(req->file, &ipt->pt) & poll->events;
449
b9ba8a44
JA
450 if (mask &&
451 ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) {
329061d3
JA
452 io_poll_remove_entries(req);
453 /* no one else has access to the req, forget about the ref */
454 return mask;
455 }
b9ba8a44 456
329061d3
JA
457 if (!mask && unlikely(ipt->error || !ipt->nr_entries)) {
458 io_poll_remove_entries(req);
459 if (!ipt->error)
460 ipt->error = -EINVAL;
461 return 0;
462 }
463
9ca9fb24
PB
464 if (req->flags & REQ_F_HASH_LOCKED)
465 io_poll_req_insert_locked(req);
466 else
467 io_poll_req_insert(req);
329061d3 468
b9ba8a44 469 if (mask && (poll->events & EPOLLET)) {
329061d3
JA
470 /* can't multishot if failed, just queue the event we've got */
471 if (unlikely(ipt->error || !ipt->nr_entries)) {
472 poll->events |= EPOLLONESHOT;
473 req->apoll_events |= EPOLLONESHOT;
474 ipt->error = 0;
475 }
476 __io_poll_execute(req, mask, poll->events);
477 return 0;
478 }
479
480 /*
481 * Release ownership. If someone tried to queue a tw while it was
482 * locked, kick it off for them.
483 */
484 v = atomic_dec_return(&req->poll_refs);
485 if (unlikely(v & IO_POLL_REF_MASK))
486 __io_poll_execute(req, 0, poll->events);
487 return 0;
488}
489
490static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
491 struct poll_table_struct *p)
492{
493 struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
494 struct async_poll *apoll = pt->req->apoll;
495
496 __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
497}
498
499int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
500{
501 const struct io_op_def *def = &io_op_defs[req->opcode];
502 struct io_ring_ctx *ctx = req->ctx;
503 struct async_poll *apoll;
504 struct io_poll_table ipt;
b9ba8a44 505 __poll_t mask = POLLPRI | POLLERR | EPOLLET;
329061d3
JA
506 int ret;
507
9ca9fb24
PB
508 /*
509 * apoll requests already grab the mutex to complete in the tw handler,
510 * so removal from the mutex-backed hash is free, use it by default.
511 */
512 if (issue_flags & IO_URING_F_UNLOCKED)
513 req->flags &= ~REQ_F_HASH_LOCKED;
514 else
515 req->flags |= REQ_F_HASH_LOCKED;
516
329061d3
JA
517 if (!def->pollin && !def->pollout)
518 return IO_APOLL_ABORTED;
519 if (!file_can_poll(req->file))
520 return IO_APOLL_ABORTED;
521 if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
522 return IO_APOLL_ABORTED;
523 if (!(req->flags & REQ_F_APOLL_MULTISHOT))
524 mask |= EPOLLONESHOT;
525
526 if (def->pollin) {
527 mask |= EPOLLIN | EPOLLRDNORM;
528
529 /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
530 if (req->flags & REQ_F_CLEAR_POLLIN)
531 mask &= ~EPOLLIN;
532 } else {
533 mask |= EPOLLOUT | EPOLLWRNORM;
534 }
535 if (def->poll_exclusive)
536 mask |= EPOLLEXCLUSIVE;
537 if (req->flags & REQ_F_POLLED) {
538 apoll = req->apoll;
539 kfree(apoll->double_poll);
540 } else if (!(issue_flags & IO_URING_F_UNLOCKED) &&
541 !list_empty(&ctx->apoll_cache)) {
542 apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
543 poll.wait.entry);
544 list_del_init(&apoll->poll.wait.entry);
545 } else {
546 apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
547 if (unlikely(!apoll))
548 return IO_APOLL_ABORTED;
549 }
550 apoll->double_poll = NULL;
551 req->apoll = apoll;
552 req->flags |= REQ_F_POLLED;
553 ipt.pt._qproc = io_async_queue_proc;
554
555 io_kbuf_recycle(req, issue_flags);
556
557 ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask);
558 if (ret || ipt.error)
559 return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
560
48863ffd 561 trace_io_uring_poll_arm(req, mask, apoll->poll.events);
329061d3
JA
562 return IO_APOLL_OK;
563}
564
9ca9fb24
PB
565static __cold bool io_poll_remove_all_table(struct task_struct *tsk,
566 struct io_hash_table *table,
567 bool cancel_all)
329061d3 568{
e6f89be6 569 unsigned nr_buckets = 1U << table->hash_bits;
329061d3
JA
570 struct hlist_node *tmp;
571 struct io_kiocb *req;
572 bool found = false;
573 int i;
574
e6f89be6
PB
575 for (i = 0; i < nr_buckets; i++) {
576 struct io_hash_bucket *hb = &table->hbs[i];
329061d3 577
38513c46
HX
578 spin_lock(&hb->lock);
579 hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) {
329061d3
JA
580 if (io_match_task_safe(req, tsk, cancel_all)) {
581 hlist_del_init(&req->hash_node);
582 io_poll_cancel_req(req);
583 found = true;
584 }
585 }
38513c46 586 spin_unlock(&hb->lock);
329061d3 587 }
329061d3
JA
588 return found;
589}
590
9ca9fb24
PB
591/*
592 * Returns true if we found and killed one or more poll requests
593 */
594__cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
595 bool cancel_all)
596 __must_hold(&ctx->uring_lock)
597{
598 return io_poll_remove_all_table(tsk, &ctx->cancel_table, cancel_all) |
599 io_poll_remove_all_table(tsk, &ctx->cancel_table_locked, cancel_all);
600}
601
329061d3 602static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only,
1ab1edb0 603 struct io_cancel_data *cd,
e6f89be6 604 struct io_hash_table *table,
1ab1edb0 605 struct io_hash_bucket **out_bucket)
329061d3 606{
329061d3 607 struct io_kiocb *req;
e6f89be6
PB
608 u32 index = hash_long(cd->data, table->hash_bits);
609 struct io_hash_bucket *hb = &table->hbs[index];
329061d3 610
1ab1edb0
PB
611 *out_bucket = NULL;
612
38513c46
HX
613 spin_lock(&hb->lock);
614 hlist_for_each_entry(req, &hb->list, hash_node) {
329061d3
JA
615 if (cd->data != req->cqe.user_data)
616 continue;
617 if (poll_only && req->opcode != IORING_OP_POLL_ADD)
618 continue;
619 if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
620 if (cd->seq == req->work.cancel_seq)
621 continue;
622 req->work.cancel_seq = cd->seq;
623 }
1ab1edb0 624 *out_bucket = hb;
329061d3
JA
625 return req;
626 }
38513c46 627 spin_unlock(&hb->lock);
329061d3
JA
628 return NULL;
629}
630
631static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx,
1ab1edb0 632 struct io_cancel_data *cd,
e6f89be6 633 struct io_hash_table *table,
1ab1edb0 634 struct io_hash_bucket **out_bucket)
329061d3 635{
e6f89be6 636 unsigned nr_buckets = 1U << table->hash_bits;
329061d3
JA
637 struct io_kiocb *req;
638 int i;
639
1ab1edb0
PB
640 *out_bucket = NULL;
641
e6f89be6
PB
642 for (i = 0; i < nr_buckets; i++) {
643 struct io_hash_bucket *hb = &table->hbs[i];
329061d3 644
38513c46
HX
645 spin_lock(&hb->lock);
646 hlist_for_each_entry(req, &hb->list, hash_node) {
329061d3
JA
647 if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
648 req->file != cd->file)
649 continue;
650 if (cd->seq == req->work.cancel_seq)
651 continue;
652 req->work.cancel_seq = cd->seq;
1ab1edb0 653 *out_bucket = hb;
329061d3
JA
654 return req;
655 }
38513c46 656 spin_unlock(&hb->lock);
329061d3
JA
657 }
658 return NULL;
659}
660
9ca9fb24 661static int io_poll_disarm(struct io_kiocb *req)
329061d3 662{
9ca9fb24
PB
663 if (!req)
664 return -ENOENT;
329061d3 665 if (!io_poll_get_ownership(req))
9ca9fb24 666 return -EALREADY;
329061d3
JA
667 io_poll_remove_entries(req);
668 hash_del(&req->hash_node);
9ca9fb24 669 return 0;
329061d3
JA
670}
671
a2cdd519 672static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
e6f89be6 673 struct io_hash_table *table)
329061d3 674{
1ab1edb0 675 struct io_hash_bucket *bucket;
329061d3
JA
676 struct io_kiocb *req;
677
678 if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY))
e6f89be6 679 req = io_poll_file_find(ctx, cd, table, &bucket);
329061d3 680 else
e6f89be6 681 req = io_poll_find(ctx, false, cd, table, &bucket);
1ab1edb0
PB
682
683 if (req)
684 io_poll_cancel_req(req);
685 if (bucket)
686 spin_unlock(&bucket->lock);
687 return req ? 0 : -ENOENT;
329061d3
JA
688}
689
5d7943d9
PB
690int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
691 unsigned issue_flags)
a2cdd519 692{
9ca9fb24
PB
693 int ret;
694
695 ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table);
696 if (ret != -ENOENT)
697 return ret;
698
699 io_ring_submit_lock(ctx, issue_flags);
700 ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table_locked);
701 io_ring_submit_unlock(ctx, issue_flags);
702 return ret;
a2cdd519
PB
703}
704
329061d3
JA
705static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe,
706 unsigned int flags)
707{
708 u32 events;
709
710 events = READ_ONCE(sqe->poll32_events);
711#ifdef __BIG_ENDIAN
712 events = swahw32(events);
713#endif
714 if (!(flags & IORING_POLL_ADD_MULTI))
715 events |= EPOLLONESHOT;
b9ba8a44
JA
716 if (!(flags & IORING_POLL_ADD_LEVEL))
717 events |= EPOLLET;
718 return demangle_poll(events) |
719 (events & (EPOLLEXCLUSIVE|EPOLLONESHOT|EPOLLET));
329061d3
JA
720}
721
722int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
723{
724 struct io_poll_update *upd = io_kiocb_to_cmd(req);
725 u32 flags;
726
727 if (sqe->buf_index || sqe->splice_fd_in)
728 return -EINVAL;
729 flags = READ_ONCE(sqe->len);
730 if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
731 IORING_POLL_ADD_MULTI))
732 return -EINVAL;
733 /* meaningless without update */
734 if (flags == IORING_POLL_ADD_MULTI)
735 return -EINVAL;
736
737 upd->old_user_data = READ_ONCE(sqe->addr);
738 upd->update_events = flags & IORING_POLL_UPDATE_EVENTS;
739 upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
740
741 upd->new_user_data = READ_ONCE(sqe->off);
742 if (!upd->update_user_data && upd->new_user_data)
743 return -EINVAL;
744 if (upd->update_events)
745 upd->events = io_poll_parse_events(sqe, flags);
746 else if (sqe->poll32_events)
747 return -EINVAL;
748
749 return 0;
750}
751
752int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
753{
754 struct io_poll *poll = io_kiocb_to_cmd(req);
755 u32 flags;
756
757 if (sqe->buf_index || sqe->off || sqe->addr)
758 return -EINVAL;
759 flags = READ_ONCE(sqe->len);
b9ba8a44 760 if (flags & ~(IORING_POLL_ADD_MULTI|IORING_POLL_ADD_LEVEL))
329061d3
JA
761 return -EINVAL;
762 if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP))
763 return -EINVAL;
764
329061d3
JA
765 poll->events = io_poll_parse_events(sqe, flags);
766 return 0;
767}
768
769int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
770{
771 struct io_poll *poll = io_kiocb_to_cmd(req);
772 struct io_poll_table ipt;
773 int ret;
774
775 ipt.pt._qproc = io_poll_queue_proc;
776
9ca9fb24
PB
777 /*
778 * If sqpoll or single issuer, there is no contention for ->uring_lock
779 * and we'll end up holding it in tw handlers anyway.
780 */
781 if (!(issue_flags & IO_URING_F_UNLOCKED) &&
782 (req->ctx->flags & (IORING_SETUP_SQPOLL | IORING_SETUP_SINGLE_ISSUER)))
783 req->flags |= REQ_F_HASH_LOCKED;
784 else
785 req->flags &= ~REQ_F_HASH_LOCKED;
786
329061d3
JA
787 ret = __io_arm_poll_handler(req, poll, &ipt, poll->events);
788 if (ret) {
789 io_req_set_res(req, ret, 0);
790 return IOU_OK;
791 }
792 if (ipt.error) {
793 req_set_fail(req);
794 return ipt.error;
795 }
796
797 return IOU_ISSUE_SKIP_COMPLETE;
798}
799
800int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
801{
802 struct io_poll_update *poll_update = io_kiocb_to_cmd(req);
803 struct io_cancel_data cd = { .data = poll_update->old_user_data, };
804 struct io_ring_ctx *ctx = req->ctx;
1ab1edb0 805 struct io_hash_bucket *bucket;
329061d3
JA
806 struct io_kiocb *preq;
807 int ret2, ret = 0;
808 bool locked;
809
e6f89be6 810 preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket);
9ca9fb24 811 ret2 = io_poll_disarm(preq);
1ab1edb0
PB
812 if (bucket)
813 spin_unlock(&bucket->lock);
9ca9fb24
PB
814 if (!ret2)
815 goto found;
816 if (ret2 != -ENOENT) {
817 ret = ret2;
38513c46
HX
818 goto out;
819 }
9ca9fb24
PB
820
821 io_ring_submit_lock(ctx, issue_flags);
822 preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table_locked, &bucket);
823 ret2 = io_poll_disarm(preq);
824 if (bucket)
825 spin_unlock(&bucket->lock);
826 io_ring_submit_unlock(ctx, issue_flags);
827 if (ret2) {
828 ret = ret2;
329061d3
JA
829 goto out;
830 }
329061d3 831
9ca9fb24 832found:
329061d3
JA
833 if (poll_update->update_events || poll_update->update_user_data) {
834 /* only mask one event flags, keep behavior flags */
835 if (poll_update->update_events) {
836 struct io_poll *poll = io_kiocb_to_cmd(preq);
837
838 poll->events &= ~0xffff;
839 poll->events |= poll_update->events & 0xffff;
840 poll->events |= IO_POLL_UNMASK;
841 }
842 if (poll_update->update_user_data)
843 preq->cqe.user_data = poll_update->new_user_data;
844
845 ret2 = io_poll_add(preq, issue_flags);
846 /* successfully updated, don't complete poll request */
847 if (!ret2 || ret2 == -EIOCBQUEUED)
848 goto out;
849 }
850
851 req_set_fail(preq);
852 io_req_set_res(preq, -ECANCELED, 0);
853 locked = !(issue_flags & IO_URING_F_UNLOCKED);
854 io_req_task_complete(preq, &locked);
855out:
856 if (ret < 0) {
857 req_set_fail(req);
858 return ret;
859 }
860 /* complete update request, we're done with it */
861 io_req_set_res(req, ret, 0);
862 return IOU_OK;
863}