#define IORING_MAX_REG_BUFFERS (1U << 14)
#define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \
- IOSQE_IO_HARDLINK | IOSQE_ASYNC)
+ IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
+ IOSQE_CQE_SKIP_SUCCESS)
#define SQE_VALID_FLAGS (SQE_COMMON_FLAGS|IOSQE_BUFFER_SELECT|IOSQE_IO_DRAIN)
REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT,
REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT,
+ REQ_F_CQE_SKIP_BIT = IOSQE_CQE_SKIP_SUCCESS_BIT,
/* first byte is taken by user flags, shift it to not overlap */
REQ_F_FAIL_BIT = 8,
REQ_F_REFCOUNT_BIT,
REQ_F_ARM_LTIMEOUT_BIT,
REQ_F_ASYNC_DATA_BIT,
+ REQ_F_SKIP_LINK_CQES_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
REQ_F_ISREG_BIT,
REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT),
/* IOSQE_BUFFER_SELECT */
REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT),
+ /* IOSQE_CQE_SKIP_SUCCESS */
+ REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT),
/* fail rest of links */
REQ_F_FAIL = BIT(REQ_F_FAIL_BIT),
REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT),
/* ->async_data allocated */
REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT),
+ /* don't post CQEs while failing linked requests */
+ REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT),
};
struct async_poll {
static inline void req_set_fail(struct io_kiocb *req)
{
req->flags |= REQ_F_FAIL;
+ if (req->flags & REQ_F_CQE_SKIP) {
+ req->flags &= ~REQ_F_CQE_SKIP;
+ req->flags |= REQ_F_SKIP_LINK_CQES;
+ }
}
static inline void req_fail_link_node(struct io_kiocb *req, int res)
static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
{
- __io_fill_cqe(req->ctx, req->user_data, res, cflags);
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe(req->ctx, req->user_data, res, cflags);
}
static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
struct io_ring_ctx *ctx = req->ctx;
spin_lock(&ctx->completion_lock);
- __io_fill_cqe(ctx, req->user_data, res, cflags);
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe(ctx, req->user_data, res, cflags);
/*
* If we're the last reference to this request, add to our locked
* free_list cache.
link->timeout.head = NULL;
if (hrtimer_try_to_cancel(&io->timer) != -1) {
list_del(&link->timeout.list);
+ /* leave REQ_F_CQE_SKIP to io_fill_cqe_req */
io_fill_cqe_req(link, -ECANCELED, 0);
io_put_req_deferred(link);
return true;
__must_hold(&req->ctx->completion_lock)
{
struct io_kiocb *nxt, *link = req->link;
+ bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES;
req->link = NULL;
while (link) {
link->link = NULL;
trace_io_uring_fail_link(req, link);
- io_fill_cqe_req(link, res, 0);
+ if (!ignore_cqes) {
+ link->flags &= ~REQ_F_CQE_SKIP;
+ io_fill_cqe_req(link, res, 0);
+ }
io_put_req_deferred(link);
link = nxt;
}
req->flags &= ~REQ_F_ARM_LTIMEOUT;
if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
io_remove_next_linked(req);
+ /* leave REQ_F_CQE_SKIP to io_fill_cqe_req */
io_fill_cqe_req(link, -ECANCELED, 0);
io_put_req_deferred(link);
posted = true;
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
- __io_fill_cqe(ctx, req->user_data, req->result,
- req->cflags);
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe(ctx, req->user_data, req->result,
+ req->cflags);
}
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
prev = start;
wq_list_for_each_resume(pos, prev) {
struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
+ u32 cflags;
/* order with io_complete_rw_iopoll(), e.g. ->result updates */
if (!smp_load_acquire(&req->iopoll_completed))
break;
- __io_fill_cqe(ctx, req->user_data, req->result,
- io_put_rw_kbuf(req));
+ cflags = io_put_rw_kbuf(req);
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe(ctx, req->user_data, req->result, cflags);
nr_events++;
}
flags = READ_ONCE(sqe->len);
if (flags & ~IORING_POLL_ADD_MULTI)
return -EINVAL;
+ if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP))
+ return -EINVAL;
io_req_set_refcount(req);
poll->events = io_poll_parse_events(sqe, flags);
IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS |
- IORING_FEAT_RSRC_TAGS;
+ IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP;
if (copy_to_user(params, p, sizeof(*p))) {
ret = -EFAULT;