return ret;
}
+static bool io_do_coalesce_buffer(struct page ***pages, int *nr_pages,
+ struct io_imu_folio_data *data, int nr_folios)
+{
+ struct page **page_array = *pages, **new_array = NULL;
+ int nr_pages_left = *nr_pages, i, j;
+
+ /* Store head pages only*/
+ new_array = kvmalloc_array(nr_folios, sizeof(struct page *),
+ GFP_KERNEL);
+ if (!new_array)
+ return false;
+
+ new_array[0] = compound_head(page_array[0]);
+ /*
+ * The pages are bound to the folio, it doesn't
+ * actually unpin them but drops all but one reference,
+ * which is usually put down by io_buffer_unmap().
+ * Note, needs a better helper.
+ */
+ if (data->nr_pages_head > 1)
+ unpin_user_pages(&page_array[1], data->nr_pages_head - 1);
+
+ j = data->nr_pages_head;
+ nr_pages_left -= data->nr_pages_head;
+ for (i = 1; i < nr_folios; i++) {
+ unsigned int nr_unpin;
+
+ new_array[i] = page_array[j];
+ nr_unpin = min_t(unsigned int, nr_pages_left - 1,
+ data->nr_pages_mid - 1);
+ if (nr_unpin)
+ unpin_user_pages(&page_array[j+1], nr_unpin);
+ j += data->nr_pages_mid;
+ nr_pages_left -= data->nr_pages_mid;
+ }
+ kvfree(page_array);
+ *pages = new_array;
+ *nr_pages = nr_folios;
+ return true;
+}
+
+static bool io_try_coalesce_buffer(struct page ***pages, int *nr_pages,
+ struct io_imu_folio_data *data)
+{
+ struct page **page_array = *pages;
+ struct folio *folio = page_folio(page_array[0]);
+ unsigned int count = 1, nr_folios = 1;
+ int i;
+
+ if (*nr_pages <= 1)
+ return false;
+
+ data->nr_pages_mid = folio_nr_pages(folio);
+ if (data->nr_pages_mid == 1)
+ return false;
+
+ data->folio_shift = folio_shift(folio);
+ /*
+ * Check if pages are contiguous inside a folio, and all folios have
+ * the same page count except for the head and tail.
+ */
+ for (i = 1; i < *nr_pages; i++) {
+ if (page_folio(page_array[i]) == folio &&
+ page_array[i] == page_array[i-1] + 1) {
+ count++;
+ continue;
+ }
+
+ if (nr_folios == 1) {
+ if (folio_page_idx(folio, page_array[i-1]) !=
+ data->nr_pages_mid - 1)
+ return false;
+
+ data->nr_pages_head = count;
+ } else if (count != data->nr_pages_mid) {
+ return false;
+ }
+
+ folio = page_folio(page_array[i]);
+ if (folio_size(folio) != (1UL << data->folio_shift) ||
+ folio_page_idx(folio, page_array[i]) != 0)
+ return false;
+
+ count = 1;
+ nr_folios++;
+ }
+ if (nr_folios == 1)
+ data->nr_pages_head = count;
+
+ return io_do_coalesce_buffer(pages, nr_pages, data, nr_folios);
+}
+
static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
struct io_mapped_ubuf **pimu,
struct page **last_hpage)
unsigned long off;
size_t size;
int ret, nr_pages, i;
- struct folio *folio = NULL;
+ struct io_imu_folio_data data;
+ bool coalesced;
*pimu = (struct io_mapped_ubuf *)&dummy_ubuf;
if (!iov->iov_base)
goto done;
}
- /* If it's a huge page, try to coalesce them into a single bvec entry */
- if (nr_pages > 1) {
- folio = page_folio(pages[0]);
- for (i = 1; i < nr_pages; i++) {
- /*
- * Pages must be consecutive and on the same folio for
- * this to work
- */
- if (page_folio(pages[i]) != folio ||
- pages[i] != pages[i - 1] + 1) {
- folio = NULL;
- break;
- }
- }
- if (folio) {
- /*
- * The pages are bound to the folio, it doesn't
- * actually unpin them but drops all but one reference,
- * which is usually put down by io_buffer_unmap().
- * Note, needs a better helper.
- */
- unpin_user_pages(&pages[1], nr_pages - 1);
- nr_pages = 1;
- }
- }
+ /* If it's huge page(s), try to coalesce them into fewer bvec entries */
+ coalesced = io_try_coalesce_buffer(&pages, &nr_pages, &data);
imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
if (!imu)
goto done;
}
- off = (unsigned long) iov->iov_base & ~PAGE_MASK;
size = iov->iov_len;
/* store original address for later verification */
imu->ubuf = (unsigned long) iov->iov_base;
imu->nr_bvecs = nr_pages;
imu->folio_shift = PAGE_SHIFT;
imu->folio_mask = PAGE_MASK;
+ if (coalesced) {
+ imu->folio_shift = data.folio_shift;
+ imu->folio_mask = ~((1UL << data.folio_shift) - 1);
+ }
+ off = (unsigned long) iov->iov_base & ~imu->folio_mask;
*pimu = imu;
ret = 0;
- if (folio) {
- bvec_set_page(&imu->bvec[0], pages[0], size, off);
- goto done;
- }
for (i = 0; i < nr_pages; i++) {
size_t vec_len;
- vec_len = min_t(size_t, size, PAGE_SIZE - off);
+ vec_len = min_t(size_t, size, (1UL << imu->folio_shift) - off);
bvec_set_page(&imu->bvec[i], pages[i], vec_len, off);
off = 0;
size -= vec_len;