Commit | Line | Data |
---|---|---|
f15ed8b4 JA |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/kernel.h> | |
3 | #include <linux/init.h> | |
4 | #include <linux/errno.h> | |
5 | #include <linux/mm.h> | |
6 | #include <linux/mman.h> | |
7 | #include <linux/slab.h> | |
8 | #include <linux/vmalloc.h> | |
9 | #include <linux/io_uring.h> | |
10 | #include <linux/io_uring_types.h> | |
11 | #include <asm/shmparam.h> | |
12 | ||
13 | #include "memmap.h" | |
14 | #include "kbuf.h" | |
15 | ||
16 | static void *io_mem_alloc_compound(struct page **pages, int nr_pages, | |
17 | size_t size, gfp_t gfp) | |
18 | { | |
19 | struct page *page; | |
20 | int i, order; | |
21 | ||
22 | order = get_order(size); | |
23 | if (order > MAX_PAGE_ORDER) | |
24 | return ERR_PTR(-ENOMEM); | |
25 | else if (order) | |
26 | gfp |= __GFP_COMP; | |
27 | ||
28 | page = alloc_pages(gfp, order); | |
29 | if (!page) | |
30 | return ERR_PTR(-ENOMEM); | |
31 | ||
32 | for (i = 0; i < nr_pages; i++) | |
33 | pages[i] = page + i; | |
34 | ||
35 | return page_address(page); | |
36 | } | |
37 | ||
38 | static void *io_mem_alloc_single(struct page **pages, int nr_pages, size_t size, | |
39 | gfp_t gfp) | |
40 | { | |
41 | void *ret; | |
42 | int i; | |
43 | ||
44 | for (i = 0; i < nr_pages; i++) { | |
45 | pages[i] = alloc_page(gfp); | |
46 | if (!pages[i]) | |
47 | goto err; | |
48 | } | |
49 | ||
50 | ret = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); | |
51 | if (ret) | |
52 | return ret; | |
53 | err: | |
54 | while (i--) | |
55 | put_page(pages[i]); | |
56 | return ERR_PTR(-ENOMEM); | |
57 | } | |
58 | ||
59 | void *io_pages_map(struct page ***out_pages, unsigned short *npages, | |
60 | size_t size) | |
61 | { | |
62 | gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN; | |
63 | struct page **pages; | |
64 | int nr_pages; | |
65 | void *ret; | |
66 | ||
67 | nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
68 | pages = kvmalloc_array(nr_pages, sizeof(struct page *), gfp); | |
69 | if (!pages) | |
70 | return ERR_PTR(-ENOMEM); | |
71 | ||
72 | ret = io_mem_alloc_compound(pages, nr_pages, size, gfp); | |
73 | if (!IS_ERR(ret)) | |
74 | goto done; | |
75 | ||
76 | ret = io_mem_alloc_single(pages, nr_pages, size, gfp); | |
77 | if (!IS_ERR(ret)) { | |
78 | done: | |
79 | *out_pages = pages; | |
80 | *npages = nr_pages; | |
81 | return ret; | |
82 | } | |
83 | ||
84 | kvfree(pages); | |
85 | *out_pages = NULL; | |
86 | *npages = 0; | |
87 | return ret; | |
88 | } | |
89 | ||
90 | void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages, | |
91 | bool put_pages) | |
92 | { | |
93 | bool do_vunmap = false; | |
94 | ||
95 | if (!ptr) | |
96 | return; | |
97 | ||
98 | if (put_pages && *npages) { | |
99 | struct page **to_free = *pages; | |
100 | int i; | |
101 | ||
102 | /* | |
103 | * Only did vmap for the non-compound multiple page case. | |
104 | * For the compound page, we just need to put the head. | |
105 | */ | |
106 | if (PageCompound(to_free[0])) | |
107 | *npages = 1; | |
108 | else if (*npages > 1) | |
109 | do_vunmap = true; | |
110 | for (i = 0; i < *npages; i++) | |
111 | put_page(to_free[i]); | |
112 | } | |
113 | if (do_vunmap) | |
114 | vunmap(ptr); | |
115 | kvfree(*pages); | |
116 | *pages = NULL; | |
117 | *npages = 0; | |
118 | } | |
119 | ||
120 | void io_pages_free(struct page ***pages, int npages) | |
121 | { | |
122 | struct page **page_array = *pages; | |
123 | ||
124 | if (!page_array) | |
125 | return; | |
126 | ||
127 | unpin_user_pages(page_array, npages); | |
128 | kvfree(page_array); | |
129 | *pages = NULL; | |
130 | } | |
131 | ||
132 | struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages) | |
133 | { | |
134 | unsigned long start, end, nr_pages; | |
135 | struct page **pages; | |
136 | int ret; | |
137 | ||
138 | end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
139 | start = uaddr >> PAGE_SHIFT; | |
140 | nr_pages = end - start; | |
141 | if (WARN_ON_ONCE(!nr_pages)) | |
142 | return ERR_PTR(-EINVAL); | |
143 | ||
144 | pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); | |
145 | if (!pages) | |
146 | return ERR_PTR(-ENOMEM); | |
147 | ||
148 | ret = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM, | |
149 | pages); | |
150 | /* success, mapped all pages */ | |
151 | if (ret == nr_pages) { | |
152 | *npages = nr_pages; | |
153 | return pages; | |
154 | } | |
155 | ||
156 | /* partial map, or didn't map anything */ | |
157 | if (ret >= 0) { | |
158 | /* if we did partial map, release any pages we did get */ | |
159 | if (ret) | |
160 | unpin_user_pages(pages, ret); | |
161 | ret = -EFAULT; | |
162 | } | |
163 | kvfree(pages); | |
164 | return ERR_PTR(ret); | |
165 | } | |
166 | ||
167 | void *__io_uaddr_map(struct page ***pages, unsigned short *npages, | |
168 | unsigned long uaddr, size_t size) | |
169 | { | |
170 | struct page **page_array; | |
171 | unsigned int nr_pages; | |
172 | void *page_addr; | |
173 | ||
174 | *npages = 0; | |
175 | ||
176 | if (uaddr & (PAGE_SIZE - 1) || !size) | |
177 | return ERR_PTR(-EINVAL); | |
178 | ||
179 | nr_pages = 0; | |
180 | page_array = io_pin_pages(uaddr, size, &nr_pages); | |
181 | if (IS_ERR(page_array)) | |
182 | return page_array; | |
183 | ||
184 | page_addr = vmap(page_array, nr_pages, VM_MAP, PAGE_KERNEL); | |
185 | if (page_addr) { | |
186 | *pages = page_array; | |
187 | *npages = nr_pages; | |
188 | return page_addr; | |
189 | } | |
190 | ||
191 | io_pages_free(&page_array, nr_pages); | |
192 | return ERR_PTR(-ENOMEM); | |
193 | } | |
194 | ||
195 | static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff, | |
196 | size_t sz) | |
197 | { | |
198 | struct io_ring_ctx *ctx = file->private_data; | |
199 | loff_t offset = pgoff << PAGE_SHIFT; | |
200 | ||
201 | switch ((pgoff << PAGE_SHIFT) & IORING_OFF_MMAP_MASK) { | |
202 | case IORING_OFF_SQ_RING: | |
203 | case IORING_OFF_CQ_RING: | |
204 | /* Don't allow mmap if the ring was setup without it */ | |
205 | if (ctx->flags & IORING_SETUP_NO_MMAP) | |
206 | return ERR_PTR(-EINVAL); | |
207 | return ctx->rings; | |
208 | case IORING_OFF_SQES: | |
209 | /* Don't allow mmap if the ring was setup without it */ | |
210 | if (ctx->flags & IORING_SETUP_NO_MMAP) | |
211 | return ERR_PTR(-EINVAL); | |
212 | return ctx->sq_sqes; | |
213 | case IORING_OFF_PBUF_RING: { | |
214 | struct io_buffer_list *bl; | |
215 | unsigned int bgid; | |
216 | void *ptr; | |
217 | ||
218 | bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT; | |
219 | bl = io_pbuf_get_bl(ctx, bgid); | |
220 | if (IS_ERR(bl)) | |
221 | return bl; | |
222 | ptr = bl->buf_ring; | |
223 | io_put_bl(ctx, bl); | |
224 | return ptr; | |
225 | } | |
226 | } | |
227 | ||
228 | return ERR_PTR(-EINVAL); | |
229 | } | |
230 | ||
231 | int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma, | |
232 | struct page **pages, int npages) | |
233 | { | |
234 | unsigned long nr_pages = npages; | |
235 | ||
236 | vm_flags_set(vma, VM_DONTEXPAND); | |
237 | return vm_insert_pages(vma, vma->vm_start, pages, &nr_pages); | |
238 | } | |
239 | ||
240 | #ifdef CONFIG_MMU | |
241 | ||
242 | __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma) | |
243 | { | |
244 | struct io_ring_ctx *ctx = file->private_data; | |
245 | size_t sz = vma->vm_end - vma->vm_start; | |
246 | long offset = vma->vm_pgoff << PAGE_SHIFT; | |
247 | void *ptr; | |
248 | ||
249 | ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz); | |
250 | if (IS_ERR(ptr)) | |
251 | return PTR_ERR(ptr); | |
252 | ||
253 | switch (offset & IORING_OFF_MMAP_MASK) { | |
254 | case IORING_OFF_SQ_RING: | |
255 | case IORING_OFF_CQ_RING: | |
256 | return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, | |
257 | ctx->n_ring_pages); | |
258 | case IORING_OFF_SQES: | |
259 | return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages, | |
260 | ctx->n_sqe_pages); | |
261 | case IORING_OFF_PBUF_RING: | |
262 | return io_pbuf_mmap(file, vma); | |
263 | } | |
264 | ||
265 | return -EINVAL; | |
266 | } | |
267 | ||
268 | unsigned long io_uring_get_unmapped_area(struct file *filp, unsigned long addr, | |
269 | unsigned long len, unsigned long pgoff, | |
270 | unsigned long flags) | |
271 | { | |
272 | void *ptr; | |
273 | ||
274 | /* | |
275 | * Do not allow to map to user-provided address to avoid breaking the | |
276 | * aliasing rules. Userspace is not able to guess the offset address of | |
277 | * kernel kmalloc()ed memory area. | |
278 | */ | |
279 | if (addr) | |
280 | return -EINVAL; | |
281 | ||
282 | ptr = io_uring_validate_mmap_request(filp, pgoff, len); | |
283 | if (IS_ERR(ptr)) | |
284 | return -ENOMEM; | |
285 | ||
286 | /* | |
287 | * Some architectures have strong cache aliasing requirements. | |
288 | * For such architectures we need a coherent mapping which aliases | |
289 | * kernel memory *and* userspace memory. To achieve that: | |
290 | * - use a NULL file pointer to reference physical memory, and | |
291 | * - use the kernel virtual address of the shared io_uring context | |
292 | * (instead of the userspace-provided address, which has to be 0UL | |
293 | * anyway). | |
294 | * - use the same pgoff which the get_unmapped_area() uses to | |
295 | * calculate the page colouring. | |
296 | * For architectures without such aliasing requirements, the | |
297 | * architecture will return any suitable mapping because addr is 0. | |
298 | */ | |
299 | filp = NULL; | |
300 | flags |= MAP_SHARED; | |
301 | pgoff = 0; /* has been translated to ptr above */ | |
302 | #ifdef SHM_COLOUR | |
303 | addr = (uintptr_t) ptr; | |
304 | pgoff = addr >> PAGE_SHIFT; | |
305 | #else | |
306 | addr = 0UL; | |
307 | #endif | |
61307b7b | 308 | return mm_get_unmapped_area(current->mm, filp, addr, len, pgoff, flags); |
f15ed8b4 JA |
309 | } |
310 | ||
311 | #else /* !CONFIG_MMU */ | |
312 | ||
313 | int io_uring_mmap(struct file *file, struct vm_area_struct *vma) | |
314 | { | |
315 | return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -EINVAL; | |
316 | } | |
317 | ||
318 | unsigned int io_uring_nommu_mmap_capabilities(struct file *file) | |
319 | { | |
320 | return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE; | |
321 | } | |
322 | ||
323 | unsigned long io_uring_get_unmapped_area(struct file *file, unsigned long addr, | |
324 | unsigned long len, unsigned long pgoff, | |
325 | unsigned long flags) | |
326 | { | |
327 | void *ptr; | |
328 | ||
329 | ptr = io_uring_validate_mmap_request(file, pgoff, len); | |
330 | if (IS_ERR(ptr)) | |
331 | return PTR_ERR(ptr); | |
332 | ||
333 | return (unsigned long) ptr; | |
334 | } | |
335 | ||
336 | #endif /* !CONFIG_MMU */ |