struct percpu_ref refs;
struct io_rings *rings;
+
+ /*
+ * If IORING_SETUP_NO_MMAP is used, then the below holds
+ * the gup'ed pages for the two rings, and the sqes.
+ */
+ struct page **ring_pages;
+ struct page **sqe_pages;
+ int n_ring_pages;
+ int n_sqe_pages;
+
unsigned int flags;
enum task_work_notify_mode notify_method;
unsigned int compat: 1;
free_compound_page(page);
}
+static void io_pages_free(struct page ***pages, int npages)
+{
+ struct page **page_array;
+ int i;
+
+ if (!pages)
+ return;
+ page_array = *pages;
+ for (i = 0; i < npages; i++)
+ unpin_user_page(page_array[i]);
+ kvfree(page_array);
+ *pages = NULL;
+}
+
+static void *__io_uaddr_map(struct page ***pages, int *npages,
+ unsigned long uaddr, size_t size)
+{
+ struct page **page_array;
+ int ret;
+
+ if (uaddr & (PAGE_SIZE - 1) || !size)
+ return ERR_PTR(-EINVAL);
+
+ *npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ page_array = kvmalloc_array(*npages, sizeof(struct page *), GFP_KERNEL);
+ if (!page_array) {
+ *npages = 0;
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret = pin_user_pages_fast(uaddr, *npages, FOLL_WRITE | FOLL_LONGTERM,
+ page_array);
+ if (ret != *npages) {
+err:
+ if (ret > 0)
+ io_pages_free(&page_array, ret);
+ *npages = 0;
+ return ret < 0 ? ERR_PTR(ret) : ERR_PTR(-EFAULT);
+ }
+ /* pages must be contig */
+ ret--;
+ if (page_array[0] + ret != page_array[ret])
+ goto err;
+ *pages = page_array;
+ return page_to_virt(page_array[0]);
+}
+
+static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr,
+ size_t size)
+{
+ return __io_uaddr_map(&ctx->ring_pages, &ctx->n_ring_pages, uaddr,
+ size);
+}
+
+static void *io_sqes_map(struct io_ring_ctx *ctx, unsigned long uaddr,
+ size_t size)
+{
+ return __io_uaddr_map(&ctx->sqe_pages, &ctx->n_sqe_pages, uaddr,
+ size);
+}
+
static void io_rings_free(struct io_ring_ctx *ctx)
{
- io_mem_free(ctx->rings);
- io_mem_free(ctx->sq_sqes);
- ctx->rings = NULL;
- ctx->sq_sqes = NULL;
+ if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
+ io_mem_free(ctx->rings);
+ io_mem_free(ctx->sq_sqes);
+ ctx->rings = NULL;
+ ctx->sq_sqes = NULL;
+ } else {
+ io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
+ io_pages_free(&ctx->sqe_pages, ctx->n_sqe_pages);
+ }
}
static void *io_mem_alloc(size_t size)
static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
{
+ struct io_ring_ctx *ctx = file->private_data;
size_t sz = vma->vm_end - vma->vm_start;
unsigned long pfn;
void *ptr;
+ /* Don't allow mmap if the ring was setup without it */
+ if (ctx->flags & IORING_SETUP_NO_MMAP)
+ return -EINVAL;
+
ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
if (IS_ERR(ptr))
return PTR_ERR(ptr);
static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
{
+ struct io_ring_ctx *ctx = file->private_data;
+
+ /* Don't allow mmap if the ring was setup without it */
+ if (ctx->flags & IORING_SETUP_NO_MMAP)
+ return -EINVAL;
+
return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -EINVAL;
}
if (size == SIZE_MAX)
return -EOVERFLOW;
- rings = io_mem_alloc(size);
+ if (!(ctx->flags & IORING_SETUP_NO_MMAP))
+ rings = io_mem_alloc(size);
+ else
+ rings = io_rings_map(ctx, p->cq_off.user_addr, size);
+
if (IS_ERR(rings))
return PTR_ERR(rings);
return -EOVERFLOW;
}
- ptr = io_mem_alloc(size);
+ if (!(ctx->flags & IORING_SETUP_NO_MMAP))
+ ptr = io_mem_alloc(size);
+ else
+ ptr = io_sqes_map(ctx, p->sq_off.user_addr, size);
+
if (IS_ERR(ptr)) {
io_rings_free(ctx);
return PTR_ERR(ptr);
}
- ctx->sq_sqes = io_mem_alloc(size);
+ ctx->sq_sqes = ptr;
return 0;
}
p->sq_off.dropped = offsetof(struct io_rings, sq_dropped);
p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings;
p->sq_off.resv1 = 0;
- p->sq_off.resv2 = 0;
+ if (!(ctx->flags & IORING_SETUP_NO_MMAP))
+ p->sq_off.user_addr = 0;
p->cq_off.head = offsetof(struct io_rings, cq.head);
p->cq_off.tail = offsetof(struct io_rings, cq.tail);
p->cq_off.cqes = offsetof(struct io_rings, cqes);
p->cq_off.flags = offsetof(struct io_rings, cq_flags);
p->cq_off.resv1 = 0;
- p->cq_off.resv2 = 0;
+ if (!(ctx->flags & IORING_SETUP_NO_MMAP))
+ p->cq_off.user_addr = 0;
p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL |
- IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG))
+ IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG |
+ IORING_SETUP_NO_MMAP))
return -EINVAL;
return io_uring_create(entries, &p, params);