arg->iovs[0].iov_base = buf;
arg->iovs[0].iov_len = *len;
+ arg->nsegs = 1;
return 1;
}
/* cap it at a reasonable 256, will be one page even for 4K */
#define PEEK_MAX_IMPORT 256
+/*
+ * Returns how many iovecs were used to fill the range. arg->nsegs contains
+ * the number of buffers mapped, which may be less than the return value if
+ * segments were coalesced.
+ */
static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
struct io_buffer_list *bl)
{
struct io_uring_buf_ring *br = bl->buf_ring;
- struct iovec *iov = arg->iovs;
+ struct iovec *prev_iov, *iov = arg->iovs;
int nr_iovs = arg->nr_iovs;
__u16 nr_avail, tail, head;
struct io_uring_buf *buf;
if (!arg->max_len)
arg->max_len = INT_MAX;
+ prev_iov = NULL;
req->buf_index = buf->bid;
do {
u32 len = buf->len;
+ void __user *ubuf;
/* truncate end piece, if needed, for non partial buffers */
if (len > arg->max_len) {
buf->len = len;
}
- iov->iov_base = u64_to_user_ptr(buf->addr);
- iov->iov_len = len;
- iov++;
+ ubuf = u64_to_user_ptr(buf->addr);
+ if (prev_iov &&
+ prev_iov->iov_base + prev_iov->iov_len == ubuf &&
+ prev_iov->iov_len + len <= MAX_RW_COUNT) {
+ prev_iov->iov_len += len;
+ } else {
+ iov->iov_base = ubuf;
+ iov->iov_len = len;
+ if (arg->coalesce)
+ prev_iov = iov;
+ iov++;
+ }
+ arg->nsegs++;
arg->out_len += len;
arg->max_len -= len;
if (!arg->max_len)
req->flags |= REQ_F_BL_EMPTY;
req->flags |= REQ_F_BUFFER_RING;
- req->buf_list = bl;
+ if (arg->coalesce)
+ req->buf_list = bl;
return iov - arg->iovs;
}
return io_provided_buffers_select(req, &arg->max_len, bl, arg);
}
+int io_buffer_segments(struct io_kiocb *req, int nbytes)
+{
+ struct io_uring_buf_ring *br;
+ struct io_buffer_list *bl;
+ int nbufs = 0;
+ unsigned bid;
+
+ /*
+ * Safe to use ->buf_list here, as coalescing can only have happened
+ * if we remained lock throughout the operation. Unlocked usage must
+ * not have buf_sel_arg->coalesce set to true
+ */
+ bl = req->buf_list;
+ if (unlikely(!bl || !(bl->flags & IOBL_BUF_RING)))
+ return 1;
+
+ bid = req->buf_index;
+ br = bl->buf_ring;
+ do {
+ struct io_uring_buf *buf;
+ int this_len;
+
+ buf = io_ring_head_to_buf(br, bid, bl->mask);
+ this_len = min_t(int, buf->len, nbytes);
+ nbufs++;
+ bid++;
+ nbytes -= this_len;
+ } while (nbytes);
+
+ return nbufs;
+}
+
static int __io_remove_buffers(struct io_ring_ctx *ctx,
struct io_buffer_list *bl, unsigned nbufs)
{
size_t max_len;
unsigned short nr_iovs;
unsigned short mode;
+ unsigned short nsegs;
+ bool coalesce;
};
+int io_buffer_segments(struct io_kiocb *req, int nbytes);
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
unsigned int issue_flags);
int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
static int io_bundle_nbufs(struct io_kiocb *req, int ret)
{
struct io_async_msghdr *kmsg = req->async_data;
- struct iovec *iov;
- int nbufs;
- /* no data is always zero segments, and a ubuf is always 1 segment */
+ /* no data is always zero segments */
if (ret <= 0)
return 0;
- if (iter_is_ubuf(&kmsg->msg.msg_iter))
- return 1;
-
- iov = kmsg->free_iov;
- if (!iov)
- iov = &kmsg->fast_iov;
-
- /* if all data was transferred, it's basic pointer math */
+ /* if all data was transferred, we already know the number of buffers */
if (!iov_iter_count(&kmsg->msg.msg_iter))
- return iter_iov(&kmsg->msg.msg_iter) - iov;
-
- /* short transfer, count segments */
- nbufs = 0;
- do {
- int this_len = min_t(int, iov[nbufs].iov_len, ret);
-
- nbufs++;
- ret -= this_len;
- } while (ret);
+ return kmsg->nbufs;
- return nbufs;
+ /* short transfer, iterate buffers to find number of segments */
+ return io_buffer_segments(req, ret);
}
static inline bool io_send_finish(struct io_kiocb *req, int *ret,
.iovs = &kmsg->fast_iov,
.max_len = min_not_zero(sr->len, INT_MAX),
.nr_iovs = 1,
+ .coalesce = !(issue_flags & IO_URING_F_UNLOCKED),
};
if (kmsg->free_iov) {
req->flags |= REQ_F_NEED_CLEANUP;
}
sr->len = arg.out_len;
+ kmsg->nbufs = arg.nsegs;
if (ret == 1) {
sr->buf = arg.iovs[0].iov_base;
.iovs = &kmsg->fast_iov,
.nr_iovs = 1,
.mode = KBUF_MODE_EXPAND,
+ .coalesce = true,
};
if (kmsg->free_iov) {
if (unlikely(ret < 0))
return ret;
- /* special case 1 vec, can be a fast path */
+ if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
+ kmsg->free_iov_nr = arg.nsegs;
+ kmsg->free_iov = arg.iovs;
+ req->flags |= REQ_F_NEED_CLEANUP;
+ }
+ kmsg->nbufs = arg.nsegs;
+
+ /*
+ * Special case 1 vec, can be a fast path. Note that multiple
+ * contig buffers may get mapped to a single vec, but we can
+ * still use ITER_UBUF for those.
+ */
if (ret == 1) {
sr->buf = arg.iovs[0].iov_base;
sr->len = arg.iovs[0].iov_len;
}
iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
arg.out_len);
- if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
- kmsg->free_iov_nr = ret;
- kmsg->free_iov = arg.iovs;
- req->flags |= REQ_F_NEED_CLEANUP;
- }
} else {
void __user *buf;
/* points to an allocated iov, if NULL we use fast_iov instead */
struct iovec *free_iov;
int free_iov_nr;
+ int nbufs;
int namelen;
__kernel_size_t controllen;
__kernel_size_t payloadlen;