io_uring: make io_uring_types.h public
[linux-block.git] / io_uring / poll.c
CommitLineData
329061d3
JA
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/fs.h>
5#include <linux/file.h>
6#include <linux/mm.h>
7#include <linux/slab.h>
8#include <linux/poll.h>
9#include <linux/hashtable.h>
10#include <linux/io_uring.h>
11
12#include <trace/events/io_uring.h>
13
14#include <uapi/linux/io_uring.h>
15
329061d3
JA
16#include "io_uring.h"
17#include "refs.h"
18#include "opdef.h"
3b77495a 19#include "kbuf.h"
329061d3 20#include "poll.h"
38513c46 21#include "cancel.h"
329061d3
JA
22
23struct io_poll_update {
24 struct file *file;
25 u64 old_user_data;
26 u64 new_user_data;
27 __poll_t events;
28 bool update_events;
29 bool update_user_data;
30};
31
32struct io_poll_table {
33 struct poll_table_struct pt;
34 struct io_kiocb *req;
35 int nr_entries;
36 int error;
37};
38
39#define IO_POLL_CANCEL_FLAG BIT(31)
40#define IO_POLL_REF_MASK GENMASK(30, 0)
41
42/*
43 * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
44 * bump it and acquire ownership. It's disallowed to modify requests while not
45 * owning it, that prevents from races for enqueueing task_work's and b/w
46 * arming poll and wakeups.
47 */
48static inline bool io_poll_get_ownership(struct io_kiocb *req)
49{
50 return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
51}
52
53static void io_poll_mark_cancelled(struct io_kiocb *req)
54{
55 atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs);
56}
57
58static struct io_poll *io_poll_get_double(struct io_kiocb *req)
59{
60 /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
61 if (req->opcode == IORING_OP_POLL_ADD)
62 return req->async_data;
63 return req->apoll->double_poll;
64}
65
66static struct io_poll *io_poll_get_single(struct io_kiocb *req)
67{
68 if (req->opcode == IORING_OP_POLL_ADD)
69 return io_kiocb_to_cmd(req);
70 return &req->apoll->poll;
71}
72
73static void io_poll_req_insert(struct io_kiocb *req)
74{
e6f89be6
PB
75 struct io_hash_table *table = &req->ctx->cancel_table;
76 u32 index = hash_long(req->cqe.user_data, table->hash_bits);
77 struct io_hash_bucket *hb = &table->hbs[index];
329061d3 78
38513c46
HX
79 spin_lock(&hb->lock);
80 hlist_add_head(&req->hash_node, &hb->list);
81 spin_unlock(&hb->lock);
82}
83
84static void io_poll_req_delete(struct io_kiocb *req, struct io_ring_ctx *ctx)
85{
e6f89be6
PB
86 struct io_hash_table *table = &req->ctx->cancel_table;
87 u32 index = hash_long(req->cqe.user_data, table->hash_bits);
88 spinlock_t *lock = &table->hbs[index].lock;
38513c46
HX
89
90 spin_lock(lock);
91 hash_del(&req->hash_node);
92 spin_unlock(lock);
329061d3
JA
93}
94
9ca9fb24
PB
95static void io_poll_req_insert_locked(struct io_kiocb *req)
96{
97 struct io_hash_table *table = &req->ctx->cancel_table_locked;
98 u32 index = hash_long(req->cqe.user_data, table->hash_bits);
99
100 hlist_add_head(&req->hash_node, &table->hbs[index].list);
101}
102
103static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked)
104{
105 struct io_ring_ctx *ctx = req->ctx;
106
107 if (req->flags & REQ_F_HASH_LOCKED) {
108 /*
109 * ->cancel_table_locked is protected by ->uring_lock in
110 * contrast to per bucket spinlocks. Likely, tctx_task_work()
111 * already grabbed the mutex for us, but there is a chance it
112 * failed.
113 */
114 io_tw_lock(ctx, locked);
115 hash_del(&req->hash_node);
116 } else {
117 io_poll_req_delete(req, ctx);
118 }
119}
120
329061d3
JA
121static void io_init_poll_iocb(struct io_poll *poll, __poll_t events,
122 wait_queue_func_t wake_func)
123{
124 poll->head = NULL;
125#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
126 /* mask in events that we always want/need */
127 poll->events = events | IO_POLL_UNMASK;
128 INIT_LIST_HEAD(&poll->wait.entry);
129 init_waitqueue_func_entry(&poll->wait, wake_func);
130}
131
132static inline void io_poll_remove_entry(struct io_poll *poll)
133{
134 struct wait_queue_head *head = smp_load_acquire(&poll->head);
135
136 if (head) {
137 spin_lock_irq(&head->lock);
138 list_del_init(&poll->wait.entry);
139 poll->head = NULL;
140 spin_unlock_irq(&head->lock);
141 }
142}
143
144static void io_poll_remove_entries(struct io_kiocb *req)
145{
146 /*
147 * Nothing to do if neither of those flags are set. Avoid dipping
148 * into the poll/apoll/double cachelines if we can.
149 */
150 if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL)))
151 return;
152
153 /*
154 * While we hold the waitqueue lock and the waitqueue is nonempty,
155 * wake_up_pollfree() will wait for us. However, taking the waitqueue
156 * lock in the first place can race with the waitqueue being freed.
157 *
158 * We solve this as eventpoll does: by taking advantage of the fact that
159 * all users of wake_up_pollfree() will RCU-delay the actual free. If
160 * we enter rcu_read_lock() and see that the pointer to the queue is
161 * non-NULL, we can then lock it without the memory being freed out from
162 * under us.
163 *
164 * Keep holding rcu_read_lock() as long as we hold the queue lock, in
165 * case the caller deletes the entry from the queue, leaving it empty.
166 * In that case, only RCU prevents the queue memory from being freed.
167 */
168 rcu_read_lock();
169 if (req->flags & REQ_F_SINGLE_POLL)
170 io_poll_remove_entry(io_poll_get_single(req));
171 if (req->flags & REQ_F_DOUBLE_POLL)
172 io_poll_remove_entry(io_poll_get_double(req));
173 rcu_read_unlock();
174}
175
176/*
177 * All poll tw should go through this. Checks for poll events, manages
178 * references, does rewait, etc.
179 *
180 * Returns a negative error on failure. >0 when no action require, which is
181 * either spurious wakeup or multishot CQE is served. 0 when it's done with
182 * the request, then the mask is stored in req->cqe.res.
183 */
184static int io_poll_check_events(struct io_kiocb *req, bool *locked)
185{
186 struct io_ring_ctx *ctx = req->ctx;
187 int v, ret;
188
189 /* req->task == current here, checking PF_EXITING is safe */
190 if (unlikely(req->task->flags & PF_EXITING))
191 return -ECANCELED;
192
193 do {
194 v = atomic_read(&req->poll_refs);
195
196 /* tw handler should be the owner, and so have some references */
197 if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK)))
198 return 0;
199 if (v & IO_POLL_CANCEL_FLAG)
200 return -ECANCELED;
201
202 if (!req->cqe.res) {
203 struct poll_table_struct pt = { ._key = req->apoll_events };
204 req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
205 }
206
207 if ((unlikely(!req->cqe.res)))
208 continue;
209 if (req->apoll_events & EPOLLONESHOT)
210 return 0;
211
212 /* multishot, just fill a CQE and proceed */
213 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
214 __poll_t mask = mangle_poll(req->cqe.res &
215 req->apoll_events);
329061d3 216
d245bca6
PB
217 if (!io_post_aux_cqe(ctx, req->cqe.user_data,
218 mask, IORING_CQE_F_MORE))
219 return -ECANCELED;
220 } else {
221 ret = io_poll_issue(req, locked);
222 if (ret)
223 return ret;
224 }
329061d3
JA
225
226 /*
227 * Release all references, retry if someone tried to restart
228 * task_work while we were executing it.
229 */
230 } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs));
231
232 return 1;
233}
234
235static void io_poll_task_func(struct io_kiocb *req, bool *locked)
236{
329061d3
JA
237 int ret;
238
239 ret = io_poll_check_events(req, locked);
240 if (ret > 0)
241 return;
242
243 if (!ret) {
244 struct io_poll *poll = io_kiocb_to_cmd(req);
245
246 req->cqe.res = mangle_poll(req->cqe.res & poll->events);
247 } else {
248 req->cqe.res = ret;
249 req_set_fail(req);
250 }
251
252 io_poll_remove_entries(req);
9ca9fb24
PB
253 io_poll_tw_hash_eject(req, locked);
254
0ec6dca2
PB
255 io_req_set_res(req, req->cqe.res, 0);
256 io_req_task_complete(req, locked);
329061d3
JA
257}
258
259static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
260{
329061d3
JA
261 int ret;
262
263 ret = io_poll_check_events(req, locked);
264 if (ret > 0)
265 return;
266
267 io_poll_remove_entries(req);
9ca9fb24 268 io_poll_tw_hash_eject(req, locked);
329061d3
JA
269
270 if (!ret)
271 io_req_task_submit(req, locked);
272 else
273 io_req_complete_failed(req, ret);
274}
275
276static void __io_poll_execute(struct io_kiocb *req, int mask,
277 __poll_t __maybe_unused events)
278{
279 io_req_set_res(req, mask, 0);
280 /*
281 * This is useful for poll that is armed on behalf of another
282 * request, and where the wakeup path could be on a different
283 * CPU. We want to avoid pulling in req->apoll->events for that
284 * case.
285 */
286 if (req->opcode == IORING_OP_POLL_ADD)
287 req->io_task_work.func = io_poll_task_func;
288 else
289 req->io_task_work.func = io_apoll_task_func;
290
291 trace_io_uring_task_add(req->ctx, req, req->cqe.user_data, req->opcode, mask);
292 io_req_task_work_add(req);
293}
294
295static inline void io_poll_execute(struct io_kiocb *req, int res,
296 __poll_t events)
297{
298 if (io_poll_get_ownership(req))
299 __io_poll_execute(req, res, events);
300}
301
302static void io_poll_cancel_req(struct io_kiocb *req)
303{
304 io_poll_mark_cancelled(req);
305 /* kick tw, which should complete the request */
306 io_poll_execute(req, 0, 0);
307}
308
309#define wqe_to_req(wait) ((void *)((unsigned long) (wait)->private & ~1))
310#define wqe_is_double(wait) ((unsigned long) (wait)->private & 1)
311#define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI)
312
313static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
314 void *key)
315{
316 struct io_kiocb *req = wqe_to_req(wait);
317 struct io_poll *poll = container_of(wait, struct io_poll, wait);
318 __poll_t mask = key_to_poll(key);
319
320 if (unlikely(mask & POLLFREE)) {
321 io_poll_mark_cancelled(req);
322 /* we have to kick tw in case it's not already */
323 io_poll_execute(req, 0, poll->events);
324
325 /*
326 * If the waitqueue is being freed early but someone is already
327 * holds ownership over it, we have to tear down the request as
328 * best we can. That means immediately removing the request from
329 * its waitqueue and preventing all further accesses to the
330 * waitqueue via the request.
331 */
332 list_del_init(&poll->wait.entry);
333
334 /*
335 * Careful: this *must* be the last step, since as soon
336 * as req->head is NULL'ed out, the request can be
337 * completed and freed, since aio_poll_complete_work()
338 * will no longer need to take the waitqueue lock.
339 */
340 smp_store_release(&poll->head, NULL);
341 return 1;
342 }
343
344 /* for instances that support it check for an event match first */
345 if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON)))
346 return 0;
347
348 if (io_poll_get_ownership(req)) {
349 /* optional, saves extra locking for removal in tw handler */
350 if (mask && poll->events & EPOLLONESHOT) {
351 list_del_init(&poll->wait.entry);
352 poll->head = NULL;
353 if (wqe_is_double(wait))
354 req->flags &= ~REQ_F_DOUBLE_POLL;
355 else
356 req->flags &= ~REQ_F_SINGLE_POLL;
357 }
358 __io_poll_execute(req, mask, poll->events);
359 }
360 return 1;
361}
362
363static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
364 struct wait_queue_head *head,
365 struct io_poll **poll_ptr)
366{
367 struct io_kiocb *req = pt->req;
368 unsigned long wqe_private = (unsigned long) req;
369
370 /*
371 * The file being polled uses multiple waitqueues for poll handling
372 * (e.g. one for read, one for write). Setup a separate io_poll
373 * if this happens.
374 */
375 if (unlikely(pt->nr_entries)) {
376 struct io_poll *first = poll;
377
378 /* double add on the same waitqueue head, ignore */
379 if (first->head == head)
380 return;
381 /* already have a 2nd entry, fail a third attempt */
382 if (*poll_ptr) {
383 if ((*poll_ptr)->head == head)
384 return;
385 pt->error = -EINVAL;
386 return;
387 }
388
389 poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
390 if (!poll) {
391 pt->error = -ENOMEM;
392 return;
393 }
394 /* mark as double wq entry */
395 wqe_private |= 1;
396 req->flags |= REQ_F_DOUBLE_POLL;
397 io_init_poll_iocb(poll, first->events, first->wait.func);
398 *poll_ptr = poll;
399 if (req->opcode == IORING_OP_POLL_ADD)
400 req->flags |= REQ_F_ASYNC_DATA;
401 }
402
403 req->flags |= REQ_F_SINGLE_POLL;
404 pt->nr_entries++;
405 poll->head = head;
406 poll->wait.private = (void *) wqe_private;
407
408 if (poll->events & EPOLLEXCLUSIVE)
409 add_wait_queue_exclusive(head, &poll->wait);
410 else
411 add_wait_queue(head, &poll->wait);
412}
413
414static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
415 struct poll_table_struct *p)
416{
417 struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
418 struct io_poll *poll = io_kiocb_to_cmd(pt->req);
419
420 __io_queue_proc(poll, pt, head,
421 (struct io_poll **) &pt->req->async_data);
422}
423
424static int __io_arm_poll_handler(struct io_kiocb *req,
425 struct io_poll *poll,
426 struct io_poll_table *ipt, __poll_t mask)
427{
428 struct io_ring_ctx *ctx = req->ctx;
429 int v;
430
431 INIT_HLIST_NODE(&req->hash_node);
432 req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
433 io_init_poll_iocb(poll, mask, io_poll_wake);
434 poll->file = req->file;
435
436 req->apoll_events = poll->events;
437
438 ipt->pt._key = mask;
439 ipt->req = req;
440 ipt->error = 0;
441 ipt->nr_entries = 0;
442
443 /*
444 * Take the ownership to delay any tw execution up until we're done
445 * with poll arming. see io_poll_get_ownership().
446 */
447 atomic_set(&req->poll_refs, 1);
448 mask = vfs_poll(req->file, &ipt->pt) & poll->events;
449
b9ba8a44
JA
450 if (mask &&
451 ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) {
329061d3
JA
452 io_poll_remove_entries(req);
453 /* no one else has access to the req, forget about the ref */
454 return mask;
455 }
b9ba8a44 456
329061d3
JA
457 if (!mask && unlikely(ipt->error || !ipt->nr_entries)) {
458 io_poll_remove_entries(req);
459 if (!ipt->error)
460 ipt->error = -EINVAL;
461 return 0;
462 }
463
9ca9fb24
PB
464 if (req->flags & REQ_F_HASH_LOCKED)
465 io_poll_req_insert_locked(req);
466 else
467 io_poll_req_insert(req);
329061d3 468
b9ba8a44 469 if (mask && (poll->events & EPOLLET)) {
329061d3
JA
470 /* can't multishot if failed, just queue the event we've got */
471 if (unlikely(ipt->error || !ipt->nr_entries)) {
472 poll->events |= EPOLLONESHOT;
473 req->apoll_events |= EPOLLONESHOT;
474 ipt->error = 0;
475 }
476 __io_poll_execute(req, mask, poll->events);
477 return 0;
478 }
479
480 /*
481 * Release ownership. If someone tried to queue a tw while it was
482 * locked, kick it off for them.
483 */
484 v = atomic_dec_return(&req->poll_refs);
485 if (unlikely(v & IO_POLL_REF_MASK))
486 __io_poll_execute(req, 0, poll->events);
487 return 0;
488}
489
490static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
491 struct poll_table_struct *p)
492{
493 struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
494 struct async_poll *apoll = pt->req->apoll;
495
496 __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
497}
498
499int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
500{
501 const struct io_op_def *def = &io_op_defs[req->opcode];
502 struct io_ring_ctx *ctx = req->ctx;
503 struct async_poll *apoll;
504 struct io_poll_table ipt;
b9ba8a44 505 __poll_t mask = POLLPRI | POLLERR | EPOLLET;
329061d3
JA
506 int ret;
507
9ca9fb24
PB
508 /*
509 * apoll requests already grab the mutex to complete in the tw handler,
510 * so removal from the mutex-backed hash is free, use it by default.
511 */
512 if (issue_flags & IO_URING_F_UNLOCKED)
513 req->flags &= ~REQ_F_HASH_LOCKED;
514 else
515 req->flags |= REQ_F_HASH_LOCKED;
516
329061d3
JA
517 if (!def->pollin && !def->pollout)
518 return IO_APOLL_ABORTED;
519 if (!file_can_poll(req->file))
520 return IO_APOLL_ABORTED;
521 if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
522 return IO_APOLL_ABORTED;
523 if (!(req->flags & REQ_F_APOLL_MULTISHOT))
524 mask |= EPOLLONESHOT;
525
526 if (def->pollin) {
527 mask |= EPOLLIN | EPOLLRDNORM;
528
529 /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
530 if (req->flags & REQ_F_CLEAR_POLLIN)
531 mask &= ~EPOLLIN;
532 } else {
533 mask |= EPOLLOUT | EPOLLWRNORM;
534 }
535 if (def->poll_exclusive)
536 mask |= EPOLLEXCLUSIVE;
537 if (req->flags & REQ_F_POLLED) {
538 apoll = req->apoll;
539 kfree(apoll->double_poll);
540 } else if (!(issue_flags & IO_URING_F_UNLOCKED) &&
541 !list_empty(&ctx->apoll_cache)) {
542 apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
543 poll.wait.entry);
544 list_del_init(&apoll->poll.wait.entry);
545 } else {
546 apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
547 if (unlikely(!apoll))
548 return IO_APOLL_ABORTED;
549 }
550 apoll->double_poll = NULL;
551 req->apoll = apoll;
552 req->flags |= REQ_F_POLLED;
553 ipt.pt._qproc = io_async_queue_proc;
554
555 io_kbuf_recycle(req, issue_flags);
556
557 ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask);
558 if (ret || ipt.error)
559 return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
560
561 trace_io_uring_poll_arm(ctx, req, req->cqe.user_data, req->opcode,
562 mask, apoll->poll.events);
563 return IO_APOLL_OK;
564}
565
9ca9fb24
PB
566static __cold bool io_poll_remove_all_table(struct task_struct *tsk,
567 struct io_hash_table *table,
568 bool cancel_all)
329061d3 569{
e6f89be6 570 unsigned nr_buckets = 1U << table->hash_bits;
329061d3
JA
571 struct hlist_node *tmp;
572 struct io_kiocb *req;
573 bool found = false;
574 int i;
575
e6f89be6
PB
576 for (i = 0; i < nr_buckets; i++) {
577 struct io_hash_bucket *hb = &table->hbs[i];
329061d3 578
38513c46
HX
579 spin_lock(&hb->lock);
580 hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) {
329061d3
JA
581 if (io_match_task_safe(req, tsk, cancel_all)) {
582 hlist_del_init(&req->hash_node);
583 io_poll_cancel_req(req);
584 found = true;
585 }
586 }
38513c46 587 spin_unlock(&hb->lock);
329061d3 588 }
329061d3
JA
589 return found;
590}
591
9ca9fb24
PB
592/*
593 * Returns true if we found and killed one or more poll requests
594 */
595__cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
596 bool cancel_all)
597 __must_hold(&ctx->uring_lock)
598{
599 return io_poll_remove_all_table(tsk, &ctx->cancel_table, cancel_all) |
600 io_poll_remove_all_table(tsk, &ctx->cancel_table_locked, cancel_all);
601}
602
329061d3 603static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only,
1ab1edb0 604 struct io_cancel_data *cd,
e6f89be6 605 struct io_hash_table *table,
1ab1edb0 606 struct io_hash_bucket **out_bucket)
329061d3 607{
329061d3 608 struct io_kiocb *req;
e6f89be6
PB
609 u32 index = hash_long(cd->data, table->hash_bits);
610 struct io_hash_bucket *hb = &table->hbs[index];
329061d3 611
1ab1edb0
PB
612 *out_bucket = NULL;
613
38513c46
HX
614 spin_lock(&hb->lock);
615 hlist_for_each_entry(req, &hb->list, hash_node) {
329061d3
JA
616 if (cd->data != req->cqe.user_data)
617 continue;
618 if (poll_only && req->opcode != IORING_OP_POLL_ADD)
619 continue;
620 if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
621 if (cd->seq == req->work.cancel_seq)
622 continue;
623 req->work.cancel_seq = cd->seq;
624 }
1ab1edb0 625 *out_bucket = hb;
329061d3
JA
626 return req;
627 }
38513c46 628 spin_unlock(&hb->lock);
329061d3
JA
629 return NULL;
630}
631
632static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx,
1ab1edb0 633 struct io_cancel_data *cd,
e6f89be6 634 struct io_hash_table *table,
1ab1edb0 635 struct io_hash_bucket **out_bucket)
329061d3 636{
e6f89be6 637 unsigned nr_buckets = 1U << table->hash_bits;
329061d3
JA
638 struct io_kiocb *req;
639 int i;
640
1ab1edb0
PB
641 *out_bucket = NULL;
642
e6f89be6
PB
643 for (i = 0; i < nr_buckets; i++) {
644 struct io_hash_bucket *hb = &table->hbs[i];
329061d3 645
38513c46
HX
646 spin_lock(&hb->lock);
647 hlist_for_each_entry(req, &hb->list, hash_node) {
329061d3
JA
648 if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
649 req->file != cd->file)
650 continue;
651 if (cd->seq == req->work.cancel_seq)
652 continue;
653 req->work.cancel_seq = cd->seq;
1ab1edb0 654 *out_bucket = hb;
329061d3
JA
655 return req;
656 }
38513c46 657 spin_unlock(&hb->lock);
329061d3
JA
658 }
659 return NULL;
660}
661
9ca9fb24 662static int io_poll_disarm(struct io_kiocb *req)
329061d3 663{
9ca9fb24
PB
664 if (!req)
665 return -ENOENT;
329061d3 666 if (!io_poll_get_ownership(req))
9ca9fb24 667 return -EALREADY;
329061d3
JA
668 io_poll_remove_entries(req);
669 hash_del(&req->hash_node);
9ca9fb24 670 return 0;
329061d3
JA
671}
672
a2cdd519 673static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
e6f89be6 674 struct io_hash_table *table)
329061d3 675{
1ab1edb0 676 struct io_hash_bucket *bucket;
329061d3
JA
677 struct io_kiocb *req;
678
679 if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY))
e6f89be6 680 req = io_poll_file_find(ctx, cd, table, &bucket);
329061d3 681 else
e6f89be6 682 req = io_poll_find(ctx, false, cd, table, &bucket);
1ab1edb0
PB
683
684 if (req)
685 io_poll_cancel_req(req);
686 if (bucket)
687 spin_unlock(&bucket->lock);
688 return req ? 0 : -ENOENT;
329061d3
JA
689}
690
5d7943d9
PB
691int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
692 unsigned issue_flags)
a2cdd519 693{
9ca9fb24
PB
694 int ret;
695
696 ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table);
697 if (ret != -ENOENT)
698 return ret;
699
700 io_ring_submit_lock(ctx, issue_flags);
701 ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table_locked);
702 io_ring_submit_unlock(ctx, issue_flags);
703 return ret;
a2cdd519
PB
704}
705
329061d3
JA
706static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe,
707 unsigned int flags)
708{
709 u32 events;
710
711 events = READ_ONCE(sqe->poll32_events);
712#ifdef __BIG_ENDIAN
713 events = swahw32(events);
714#endif
715 if (!(flags & IORING_POLL_ADD_MULTI))
716 events |= EPOLLONESHOT;
b9ba8a44
JA
717 if (!(flags & IORING_POLL_ADD_LEVEL))
718 events |= EPOLLET;
719 return demangle_poll(events) |
720 (events & (EPOLLEXCLUSIVE|EPOLLONESHOT|EPOLLET));
329061d3
JA
721}
722
723int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
724{
725 struct io_poll_update *upd = io_kiocb_to_cmd(req);
726 u32 flags;
727
728 if (sqe->buf_index || sqe->splice_fd_in)
729 return -EINVAL;
730 flags = READ_ONCE(sqe->len);
731 if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
732 IORING_POLL_ADD_MULTI))
733 return -EINVAL;
734 /* meaningless without update */
735 if (flags == IORING_POLL_ADD_MULTI)
736 return -EINVAL;
737
738 upd->old_user_data = READ_ONCE(sqe->addr);
739 upd->update_events = flags & IORING_POLL_UPDATE_EVENTS;
740 upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
741
742 upd->new_user_data = READ_ONCE(sqe->off);
743 if (!upd->update_user_data && upd->new_user_data)
744 return -EINVAL;
745 if (upd->update_events)
746 upd->events = io_poll_parse_events(sqe, flags);
747 else if (sqe->poll32_events)
748 return -EINVAL;
749
750 return 0;
751}
752
753int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
754{
755 struct io_poll *poll = io_kiocb_to_cmd(req);
756 u32 flags;
757
758 if (sqe->buf_index || sqe->off || sqe->addr)
759 return -EINVAL;
760 flags = READ_ONCE(sqe->len);
b9ba8a44 761 if (flags & ~(IORING_POLL_ADD_MULTI|IORING_POLL_ADD_LEVEL))
329061d3
JA
762 return -EINVAL;
763 if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP))
764 return -EINVAL;
765
329061d3
JA
766 poll->events = io_poll_parse_events(sqe, flags);
767 return 0;
768}
769
770int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
771{
772 struct io_poll *poll = io_kiocb_to_cmd(req);
773 struct io_poll_table ipt;
774 int ret;
775
776 ipt.pt._qproc = io_poll_queue_proc;
777
9ca9fb24
PB
778 /*
779 * If sqpoll or single issuer, there is no contention for ->uring_lock
780 * and we'll end up holding it in tw handlers anyway.
781 */
782 if (!(issue_flags & IO_URING_F_UNLOCKED) &&
783 (req->ctx->flags & (IORING_SETUP_SQPOLL | IORING_SETUP_SINGLE_ISSUER)))
784 req->flags |= REQ_F_HASH_LOCKED;
785 else
786 req->flags &= ~REQ_F_HASH_LOCKED;
787
329061d3
JA
788 ret = __io_arm_poll_handler(req, poll, &ipt, poll->events);
789 if (ret) {
790 io_req_set_res(req, ret, 0);
791 return IOU_OK;
792 }
793 if (ipt.error) {
794 req_set_fail(req);
795 return ipt.error;
796 }
797
798 return IOU_ISSUE_SKIP_COMPLETE;
799}
800
801int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
802{
803 struct io_poll_update *poll_update = io_kiocb_to_cmd(req);
804 struct io_cancel_data cd = { .data = poll_update->old_user_data, };
805 struct io_ring_ctx *ctx = req->ctx;
1ab1edb0 806 struct io_hash_bucket *bucket;
329061d3
JA
807 struct io_kiocb *preq;
808 int ret2, ret = 0;
809 bool locked;
810
e6f89be6 811 preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket);
9ca9fb24 812 ret2 = io_poll_disarm(preq);
1ab1edb0
PB
813 if (bucket)
814 spin_unlock(&bucket->lock);
9ca9fb24
PB
815 if (!ret2)
816 goto found;
817 if (ret2 != -ENOENT) {
818 ret = ret2;
38513c46
HX
819 goto out;
820 }
9ca9fb24
PB
821
822 io_ring_submit_lock(ctx, issue_flags);
823 preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table_locked, &bucket);
824 ret2 = io_poll_disarm(preq);
825 if (bucket)
826 spin_unlock(&bucket->lock);
827 io_ring_submit_unlock(ctx, issue_flags);
828 if (ret2) {
829 ret = ret2;
329061d3
JA
830 goto out;
831 }
329061d3 832
9ca9fb24 833found:
329061d3
JA
834 if (poll_update->update_events || poll_update->update_user_data) {
835 /* only mask one event flags, keep behavior flags */
836 if (poll_update->update_events) {
837 struct io_poll *poll = io_kiocb_to_cmd(preq);
838
839 poll->events &= ~0xffff;
840 poll->events |= poll_update->events & 0xffff;
841 poll->events |= IO_POLL_UNMASK;
842 }
843 if (poll_update->update_user_data)
844 preq->cqe.user_data = poll_update->new_user_data;
845
846 ret2 = io_poll_add(preq, issue_flags);
847 /* successfully updated, don't complete poll request */
848 if (!ret2 || ret2 == -EIOCBQUEUED)
849 goto out;
850 }
851
852 req_set_fail(preq);
853 io_req_set_res(preq, -ECANCELED, 0);
854 locked = !(issue_flags & IO_URING_F_UNLOCKED);
855 io_req_task_complete(preq, &locked);
856out:
857 if (ret < 0) {
858 req_set_fail(req);
859 return ret;
860 }
861 /* complete update request, we're done with it */
862 io_req_set_res(req, ret, 0);
863 return IOU_OK;
864}