io_uring/kbuf: vmap pinned buffer ring
authorJens Axboe <axboe@kernel.dk>
Tue, 12 Mar 2024 16:42:27 +0000 (10:42 -0600)
committerJens Axboe <axboe@kernel.dk>
Mon, 15 Apr 2024 14:10:26 +0000 (08:10 -0600)
This avoids needing to care about HIGHMEM, and it makes the buffer
indexing easier as both ring provided buffer methods are now virtually
mapped in a contigious fashion.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
io_uring/kbuf.c

index 3aa16e27f5099a426abe1f991c991ffdd9ab379f..4289f4a926934635ad024630ff58481bc9de3c52 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/slab.h>
 #include <linux/namei.h>
 #include <linux/poll.h>
+#include <linux/vmalloc.h>
 #include <linux/io_uring.h>
 
 #include <uapi/linux/io_uring.h>
@@ -146,15 +147,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
                req->flags |= REQ_F_BL_EMPTY;
 
        head &= bl->mask;
-       /* mmaped buffers are always contig */
-       if (bl->is_mmap || head < IO_BUFFER_LIST_BUF_PER_PAGE) {
-               buf = &br->bufs[head];
-       } else {
-               int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
-               int index = head / IO_BUFFER_LIST_BUF_PER_PAGE;
-               buf = page_address(bl->buf_pages[index]);
-               buf += off;
-       }
+       buf = &br->bufs[head];
        if (*len == 0 || *len > buf->len)
                *len = buf->len;
        req->flags |= REQ_F_BUFFER_RING;
@@ -241,6 +234,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
                        for (j = 0; j < bl->buf_nr_pages; j++)
                                unpin_user_page(bl->buf_pages[j]);
                        kvfree(bl->buf_pages);
+                       vunmap(bl->buf_ring);
                        bl->buf_pages = NULL;
                        bl->buf_nr_pages = 0;
                }
@@ -498,9 +492,9 @@ err:
 static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
                            struct io_buffer_list *bl)
 {
-       struct io_uring_buf_ring *br;
+       struct io_uring_buf_ring *br = NULL;
+       int nr_pages, ret, i;
        struct page **pages;
-       int i, nr_pages;
 
        pages = io_pin_pages(reg->ring_addr,
                             flex_array_size(br, bufs, reg->ring_entries),
@@ -508,18 +502,12 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
        if (IS_ERR(pages))
                return PTR_ERR(pages);
 
-       /*
-        * Apparently some 32-bit boxes (ARM) will return highmem pages,
-        * which then need to be mapped. We could support that, but it'd
-        * complicate the code and slowdown the common cases quite a bit.
-        * So just error out, returning -EINVAL just like we did on kernels
-        * that didn't support mapped buffer rings.
-        */
-       for (i = 0; i < nr_pages; i++)
-               if (PageHighMem(pages[i]))
-                       goto error_unpin;
+       br = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+       if (!br) {
+               ret = -ENOMEM;
+               goto error_unpin;
+       }
 
-       br = page_address(pages[0]);
 #ifdef SHM_COLOUR
        /*
         * On platforms that have specific aliasing requirements, SHM_COLOUR
@@ -530,8 +518,10 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
         * should use IOU_PBUF_RING_MMAP instead, and liburing will handle
         * this transparently.
         */
-       if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1))
+       if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) {
+               ret = -EINVAL;
                goto error_unpin;
+       }
 #endif
        bl->buf_pages = pages;
        bl->buf_nr_pages = nr_pages;
@@ -543,7 +533,8 @@ error_unpin:
        for (i = 0; i < nr_pages; i++)
                unpin_user_page(pages[i]);
        kvfree(pages);
-       return -EINVAL;
+       vunmap(br);
+       return ret;
 }
 
 /*