Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[linux-block.git] / io_uring / rsrc.c
index 27ceda3b50cf4e677cde5e2417a80adad66e162f..2e0a14e87187ff401b60fb094e9e603502f3437f 100644 (file)
@@ -1161,14 +1161,17 @@ struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages)
        pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
                              pages, vmas);
        if (pret == nr_pages) {
+               struct file *file = vmas[0]->vm_file;
+
                /* don't support file backed memory */
                for (i = 0; i < nr_pages; i++) {
-                       struct vm_area_struct *vma = vmas[i];
-
-                       if (vma_is_shmem(vma))
+                       if (vmas[i]->vm_file != file) {
+                               ret = -EINVAL;
+                               break;
+                       }
+                       if (!file)
                                continue;
-                       if (vma->vm_file &&
-                           !is_file_hugepages(vma->vm_file)) {
+                       if (!vma_is_shmem(vmas[i]) && !is_file_hugepages(file)) {
                                ret = -EOPNOTSUPP;
                                break;
                        }
@@ -1206,6 +1209,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
        unsigned long off;
        size_t size;
        int ret, nr_pages, i;
+       struct folio *folio = NULL;
 
        *pimu = ctx->dummy_ubuf;
        if (!iov->iov_base)
@@ -1220,6 +1224,21 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
                goto done;
        }
 
+       /* If it's a huge page, try to coalesce them into a single bvec entry */
+       if (nr_pages > 1) {
+               folio = page_folio(pages[0]);
+               for (i = 1; i < nr_pages; i++) {
+                       if (page_folio(pages[i]) != folio) {
+                               folio = NULL;
+                               break;
+                       }
+               }
+               if (folio) {
+                       folio_put_refs(folio, nr_pages - 1);
+                       nr_pages = 1;
+               }
+       }
+
        imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
        if (!imu)
                goto done;
@@ -1232,6 +1251,17 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
 
        off = (unsigned long) iov->iov_base & ~PAGE_MASK;
        size = iov->iov_len;
+       /* store original address for later verification */
+       imu->ubuf = (unsigned long) iov->iov_base;
+       imu->ubuf_end = imu->ubuf + iov->iov_len;
+       imu->nr_bvecs = nr_pages;
+       *pimu = imu;
+       ret = 0;
+
+       if (folio) {
+               bvec_set_page(&imu->bvec[0], pages[0], size, off);
+               goto done;
+       }
        for (i = 0; i < nr_pages; i++) {
                size_t vec_len;
 
@@ -1240,12 +1270,6 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
                off = 0;
                size -= vec_len;
        }
-       /* store original address for later verification */
-       imu->ubuf = (unsigned long) iov->iov_base;
-       imu->ubuf_end = imu->ubuf + iov->iov_len;
-       imu->nr_bvecs = nr_pages;
-       *pimu = imu;
-       ret = 0;
 done:
        if (ret)
                kvfree(imu);
@@ -1334,7 +1358,7 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
                return -EFAULT;
 
        /*
-        * May not be a start of buffer, set size appropriately
+        * Might not be a start of buffer, set size appropriately
         * and advance us to the beginning.
         */
        offset = buf_addr - imu->ubuf;
@@ -1360,7 +1384,15 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
                const struct bio_vec *bvec = imu->bvec;
 
                if (offset <= bvec->bv_len) {
-                       iov_iter_advance(iter, offset);
+                       /*
+                        * Note, huge pages buffers consists of one large
+                        * bvec entry and should always go this way. The other
+                        * branch doesn't expect non PAGE_SIZE'd chunks.
+                        */
+                       iter->bvec = bvec;
+                       iter->nr_segs = bvec->bv_len;
+                       iter->count -= offset;
+                       iter->iov_offset = offset;
                } else {
                        unsigned long seg_skip;