Commit | Line | Data |
---|---|---|
1c5de193 JF |
1 | /****************************************************************************** |
2 | * privcmd.c | |
3 | * | |
4 | * Interface to privileged domain-0 commands. | |
5 | * | |
6 | * Copyright (c) 2002-2004, K A Fraser, B Dragovic | |
7 | */ | |
8 | ||
283c0972 JP |
9 | #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt |
10 | ||
1c5de193 | 11 | #include <linux/kernel.h> |
d8414d3c | 12 | #include <linux/module.h> |
1c5de193 JF |
13 | #include <linux/sched.h> |
14 | #include <linux/slab.h> | |
15 | #include <linux/string.h> | |
16 | #include <linux/errno.h> | |
17 | #include <linux/mm.h> | |
18 | #include <linux/mman.h> | |
19 | #include <linux/uaccess.h> | |
20 | #include <linux/swap.h> | |
1c5de193 JF |
21 | #include <linux/highmem.h> |
22 | #include <linux/pagemap.h> | |
23 | #include <linux/seq_file.h> | |
d8414d3c | 24 | #include <linux/miscdevice.h> |
ab520be8 | 25 | #include <linux/moduleparam.h> |
1c5de193 JF |
26 | |
27 | #include <asm/pgalloc.h> | |
28 | #include <asm/pgtable.h> | |
29 | #include <asm/tlb.h> | |
30 | #include <asm/xen/hypervisor.h> | |
31 | #include <asm/xen/hypercall.h> | |
32 | ||
33 | #include <xen/xen.h> | |
34 | #include <xen/privcmd.h> | |
35 | #include <xen/interface/xen.h> | |
ab520be8 | 36 | #include <xen/interface/hvm/dm_op.h> |
1c5de193 JF |
37 | #include <xen/features.h> |
38 | #include <xen/page.h> | |
de1ef206 | 39 | #include <xen/xen-ops.h> |
d71f5139 | 40 | #include <xen/balloon.h> |
f020e290 | 41 | |
d8414d3c BB |
42 | #include "privcmd.h" |
43 | ||
44 | MODULE_LICENSE("GPL"); | |
45 | ||
d71f5139 MR |
46 | #define PRIV_VMA_LOCKED ((void *)1) |
47 | ||
ab520be8 PD |
48 | static unsigned int privcmd_dm_op_max_num = 16; |
49 | module_param_named(dm_op_max_nr_bufs, privcmd_dm_op_max_num, uint, 0644); | |
50 | MODULE_PARM_DESC(dm_op_max_nr_bufs, | |
51 | "Maximum number of buffers per dm_op hypercall"); | |
52 | ||
53 | static unsigned int privcmd_dm_op_buf_max_size = 4096; | |
54 | module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint, | |
55 | 0644); | |
56 | MODULE_PARM_DESC(dm_op_buf_max_size, | |
57 | "Maximum size of a dm_op hypercall buffer"); | |
58 | ||
4610d240 PD |
59 | struct privcmd_data { |
60 | domid_t domid; | |
61 | }; | |
62 | ||
a5deabe0 ALC |
63 | static int privcmd_vma_range_is_mapped( |
64 | struct vm_area_struct *vma, | |
65 | unsigned long addr, | |
66 | unsigned long nr_pages); | |
1c5de193 | 67 | |
4610d240 | 68 | static long privcmd_ioctl_hypercall(struct file *file, void __user *udata) |
1c5de193 | 69 | { |
4610d240 | 70 | struct privcmd_data *data = file->private_data; |
1c5de193 JF |
71 | struct privcmd_hypercall hypercall; |
72 | long ret; | |
73 | ||
4610d240 PD |
74 | /* Disallow arbitrary hypercalls if restricted */ |
75 | if (data->domid != DOMID_INVALID) | |
76 | return -EPERM; | |
77 | ||
1c5de193 JF |
78 | if (copy_from_user(&hypercall, udata, sizeof(hypercall))) |
79 | return -EFAULT; | |
80 | ||
fdfd811d | 81 | xen_preemptible_hcall_begin(); |
1c5de193 JF |
82 | ret = privcmd_call(hypercall.op, |
83 | hypercall.arg[0], hypercall.arg[1], | |
84 | hypercall.arg[2], hypercall.arg[3], | |
85 | hypercall.arg[4]); | |
fdfd811d | 86 | xen_preemptible_hcall_end(); |
1c5de193 JF |
87 | |
88 | return ret; | |
89 | } | |
90 | ||
91 | static void free_page_list(struct list_head *pages) | |
92 | { | |
93 | struct page *p, *n; | |
94 | ||
95 | list_for_each_entry_safe(p, n, pages, lru) | |
96 | __free_page(p); | |
97 | ||
98 | INIT_LIST_HEAD(pages); | |
99 | } | |
100 | ||
101 | /* | |
102 | * Given an array of items in userspace, return a list of pages | |
103 | * containing the data. If copying fails, either because of memory | |
104 | * allocation failure or a problem reading user memory, return an | |
105 | * error code; its up to the caller to dispose of any partial list. | |
106 | */ | |
107 | static int gather_array(struct list_head *pagelist, | |
108 | unsigned nelem, size_t size, | |
ceb90fa0 | 109 | const void __user *data) |
1c5de193 JF |
110 | { |
111 | unsigned pageidx; | |
112 | void *pagedata; | |
113 | int ret; | |
114 | ||
115 | if (size > PAGE_SIZE) | |
116 | return 0; | |
117 | ||
118 | pageidx = PAGE_SIZE; | |
119 | pagedata = NULL; /* quiet, gcc */ | |
120 | while (nelem--) { | |
121 | if (pageidx > PAGE_SIZE-size) { | |
122 | struct page *page = alloc_page(GFP_KERNEL); | |
123 | ||
124 | ret = -ENOMEM; | |
125 | if (page == NULL) | |
126 | goto fail; | |
127 | ||
128 | pagedata = page_address(page); | |
129 | ||
130 | list_add_tail(&page->lru, pagelist); | |
131 | pageidx = 0; | |
132 | } | |
133 | ||
134 | ret = -EFAULT; | |
135 | if (copy_from_user(pagedata + pageidx, data, size)) | |
136 | goto fail; | |
137 | ||
138 | data += size; | |
139 | pageidx += size; | |
140 | } | |
141 | ||
142 | ret = 0; | |
143 | ||
144 | fail: | |
145 | return ret; | |
146 | } | |
147 | ||
148 | /* | |
149 | * Call function "fn" on each element of the array fragmented | |
150 | * over a list of pages. | |
151 | */ | |
152 | static int traverse_pages(unsigned nelem, size_t size, | |
153 | struct list_head *pos, | |
154 | int (*fn)(void *data, void *state), | |
155 | void *state) | |
156 | { | |
157 | void *pagedata; | |
158 | unsigned pageidx; | |
f020e290 | 159 | int ret = 0; |
1c5de193 JF |
160 | |
161 | BUG_ON(size > PAGE_SIZE); | |
162 | ||
163 | pageidx = PAGE_SIZE; | |
164 | pagedata = NULL; /* hush, gcc */ | |
165 | ||
166 | while (nelem--) { | |
167 | if (pageidx > PAGE_SIZE-size) { | |
168 | struct page *page; | |
169 | pos = pos->next; | |
170 | page = list_entry(pos, struct page, lru); | |
171 | pagedata = page_address(page); | |
172 | pageidx = 0; | |
173 | } | |
174 | ||
175 | ret = (*fn)(pagedata + pageidx, state); | |
176 | if (ret) | |
177 | break; | |
178 | pageidx += size; | |
179 | } | |
180 | ||
181 | return ret; | |
182 | } | |
183 | ||
4e8c0c8c DV |
184 | /* |
185 | * Similar to traverse_pages, but use each page as a "block" of | |
186 | * data to be processed as one unit. | |
187 | */ | |
188 | static int traverse_pages_block(unsigned nelem, size_t size, | |
189 | struct list_head *pos, | |
190 | int (*fn)(void *data, int nr, void *state), | |
191 | void *state) | |
192 | { | |
193 | void *pagedata; | |
194 | unsigned pageidx; | |
195 | int ret = 0; | |
196 | ||
197 | BUG_ON(size > PAGE_SIZE); | |
198 | ||
199 | pageidx = PAGE_SIZE; | |
200 | ||
201 | while (nelem) { | |
202 | int nr = (PAGE_SIZE/size); | |
203 | struct page *page; | |
204 | if (nr > nelem) | |
205 | nr = nelem; | |
206 | pos = pos->next; | |
207 | page = list_entry(pos, struct page, lru); | |
208 | pagedata = page_address(page); | |
209 | ret = (*fn)(pagedata, nr, state); | |
210 | if (ret) | |
211 | break; | |
212 | nelem -= nr; | |
213 | } | |
214 | ||
215 | return ret; | |
216 | } | |
217 | ||
a13d7201 | 218 | struct mmap_gfn_state { |
1c5de193 JF |
219 | unsigned long va; |
220 | struct vm_area_struct *vma; | |
221 | domid_t domain; | |
222 | }; | |
223 | ||
a13d7201 | 224 | static int mmap_gfn_range(void *data, void *state) |
1c5de193 JF |
225 | { |
226 | struct privcmd_mmap_entry *msg = data; | |
a13d7201 | 227 | struct mmap_gfn_state *st = state; |
1c5de193 JF |
228 | struct vm_area_struct *vma = st->vma; |
229 | int rc; | |
230 | ||
231 | /* Do not allow range to wrap the address space. */ | |
232 | if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || | |
233 | ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) | |
234 | return -EINVAL; | |
235 | ||
236 | /* Range chunks must be contiguous in va space. */ | |
237 | if ((msg->va != st->va) || | |
238 | ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) | |
239 | return -EINVAL; | |
240 | ||
a13d7201 | 241 | rc = xen_remap_domain_gfn_range(vma, |
de1ef206 IC |
242 | msg->va & PAGE_MASK, |
243 | msg->mfn, msg->npages, | |
244 | vma->vm_page_prot, | |
9a032e39 | 245 | st->domain, NULL); |
1c5de193 JF |
246 | if (rc < 0) |
247 | return rc; | |
248 | ||
249 | st->va += msg->npages << PAGE_SHIFT; | |
250 | ||
251 | return 0; | |
252 | } | |
253 | ||
4610d240 | 254 | static long privcmd_ioctl_mmap(struct file *file, void __user *udata) |
1c5de193 | 255 | { |
4610d240 | 256 | struct privcmd_data *data = file->private_data; |
1c5de193 JF |
257 | struct privcmd_mmap mmapcmd; |
258 | struct mm_struct *mm = current->mm; | |
259 | struct vm_area_struct *vma; | |
260 | int rc; | |
261 | LIST_HEAD(pagelist); | |
a13d7201 | 262 | struct mmap_gfn_state state; |
1c5de193 | 263 | |
d71f5139 MR |
264 | /* We only support privcmd_ioctl_mmap_batch for auto translated. */ |
265 | if (xen_feature(XENFEAT_auto_translated_physmap)) | |
266 | return -ENOSYS; | |
267 | ||
1c5de193 JF |
268 | if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) |
269 | return -EFAULT; | |
270 | ||
4610d240 PD |
271 | /* If restriction is in place, check the domid matches */ |
272 | if (data->domid != DOMID_INVALID && data->domid != mmapcmd.dom) | |
273 | return -EPERM; | |
274 | ||
1c5de193 JF |
275 | rc = gather_array(&pagelist, |
276 | mmapcmd.num, sizeof(struct privcmd_mmap_entry), | |
277 | mmapcmd.entry); | |
278 | ||
279 | if (rc || list_empty(&pagelist)) | |
280 | goto out; | |
281 | ||
282 | down_write(&mm->mmap_sem); | |
283 | ||
284 | { | |
285 | struct page *page = list_first_entry(&pagelist, | |
286 | struct page, lru); | |
287 | struct privcmd_mmap_entry *msg = page_address(page); | |
288 | ||
289 | vma = find_vma(mm, msg->va); | |
290 | rc = -EINVAL; | |
291 | ||
a5deabe0 | 292 | if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data) |
1c5de193 | 293 | goto out_up; |
a5deabe0 | 294 | vma->vm_private_data = PRIV_VMA_LOCKED; |
1c5de193 JF |
295 | } |
296 | ||
297 | state.va = vma->vm_start; | |
298 | state.vma = vma; | |
299 | state.domain = mmapcmd.dom; | |
300 | ||
301 | rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), | |
302 | &pagelist, | |
a13d7201 | 303 | mmap_gfn_range, &state); |
1c5de193 JF |
304 | |
305 | ||
306 | out_up: | |
307 | up_write(&mm->mmap_sem); | |
308 | ||
309 | out: | |
310 | free_page_list(&pagelist); | |
311 | ||
312 | return rc; | |
313 | } | |
314 | ||
315 | struct mmap_batch_state { | |
316 | domid_t domain; | |
317 | unsigned long va; | |
318 | struct vm_area_struct *vma; | |
d71f5139 | 319 | int index; |
ceb90fa0 ALC |
320 | /* A tristate: |
321 | * 0 for no errors | |
322 | * 1 if at least one error has happened (and no | |
323 | * -ENOENT errors have happened) | |
324 | * -ENOENT if at least 1 -ENOENT has happened. | |
325 | */ | |
326 | int global_error; | |
99beae6c | 327 | int version; |
ceb90fa0 | 328 | |
a13d7201 JG |
329 | /* User-space gfn array to store errors in the second pass for V1. */ |
330 | xen_pfn_t __user *user_gfn; | |
99beae6c ALC |
331 | /* User-space int array to store errors in the second pass for V2. */ |
332 | int __user *user_err; | |
1c5de193 JF |
333 | }; |
334 | ||
a13d7201 JG |
335 | /* auto translated dom0 note: if domU being created is PV, then gfn is |
336 | * mfn(addr on bus). If it's auto xlated, then gfn is pfn (input to HAP). | |
d71f5139 | 337 | */ |
4e8c0c8c | 338 | static int mmap_batch_fn(void *data, int nr, void *state) |
1c5de193 | 339 | { |
a13d7201 | 340 | xen_pfn_t *gfnp = data; |
1c5de193 | 341 | struct mmap_batch_state *st = state; |
d71f5139 MR |
342 | struct vm_area_struct *vma = st->vma; |
343 | struct page **pages = vma->vm_private_data; | |
4e8c0c8c | 344 | struct page **cur_pages = NULL; |
ceb90fa0 ALC |
345 | int ret; |
346 | ||
d71f5139 | 347 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
4e8c0c8c | 348 | cur_pages = &pages[st->index]; |
d71f5139 | 349 | |
4e8c0c8c | 350 | BUG_ON(nr < 0); |
a13d7201 JG |
351 | ret = xen_remap_domain_gfn_array(st->vma, st->va & PAGE_MASK, gfnp, nr, |
352 | (int *)gfnp, st->vma->vm_page_prot, | |
4e8c0c8c | 353 | st->domain, cur_pages); |
1c5de193 | 354 | |
4e8c0c8c DV |
355 | /* Adjust the global_error? */ |
356 | if (ret != nr) { | |
ceb90fa0 ALC |
357 | if (ret == -ENOENT) |
358 | st->global_error = -ENOENT; | |
359 | else { | |
360 | /* Record that at least one error has happened. */ | |
361 | if (st->global_error == 0) | |
362 | st->global_error = 1; | |
363 | } | |
1c5de193 | 364 | } |
4e8c0c8c DV |
365 | st->va += PAGE_SIZE * nr; |
366 | st->index += nr; | |
1c5de193 JF |
367 | |
368 | return 0; | |
369 | } | |
370 | ||
4e8c0c8c | 371 | static int mmap_return_error(int err, struct mmap_batch_state *st) |
1c5de193 | 372 | { |
4e8c0c8c | 373 | int ret; |
ceb90fa0 | 374 | |
99beae6c | 375 | if (st->version == 1) { |
4e8c0c8c | 376 | if (err) { |
a13d7201 | 377 | xen_pfn_t gfn; |
4e8c0c8c | 378 | |
a13d7201 | 379 | ret = get_user(gfn, st->user_gfn); |
4e8c0c8c DV |
380 | if (ret < 0) |
381 | return ret; | |
382 | /* | |
383 | * V1 encodes the error codes in the 32bit top | |
a13d7201 | 384 | * nibble of the gfn (with its known |
4e8c0c8c DV |
385 | * limitations vis-a-vis 64 bit callers). |
386 | */ | |
a13d7201 | 387 | gfn |= (err == -ENOENT) ? |
4e8c0c8c DV |
388 | PRIVCMD_MMAPBATCH_PAGED_ERROR : |
389 | PRIVCMD_MMAPBATCH_MFN_ERROR; | |
a13d7201 | 390 | return __put_user(gfn, st->user_gfn++); |
4e8c0c8c | 391 | } else |
a13d7201 | 392 | st->user_gfn++; |
99beae6c | 393 | } else { /* st->version == 2 */ |
99beae6c ALC |
394 | if (err) |
395 | return __put_user(err, st->user_err++); | |
396 | else | |
397 | st->user_err++; | |
398 | } | |
399 | ||
400 | return 0; | |
1c5de193 JF |
401 | } |
402 | ||
4e8c0c8c DV |
403 | static int mmap_return_errors(void *data, int nr, void *state) |
404 | { | |
405 | struct mmap_batch_state *st = state; | |
406 | int *errs = data; | |
407 | int i; | |
408 | int ret; | |
409 | ||
410 | for (i = 0; i < nr; i++) { | |
411 | ret = mmap_return_error(errs[i], st); | |
412 | if (ret < 0) | |
413 | return ret; | |
414 | } | |
415 | return 0; | |
416 | } | |
417 | ||
a13d7201 | 418 | /* Allocate pfns that are then mapped with gfns from foreign domid. Update |
d71f5139 MR |
419 | * the vma with the page info to use later. |
420 | * Returns: 0 if success, otherwise -errno | |
421 | */ | |
422 | static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) | |
423 | { | |
424 | int rc; | |
425 | struct page **pages; | |
426 | ||
427 | pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); | |
428 | if (pages == NULL) | |
429 | return -ENOMEM; | |
430 | ||
81b286e0 | 431 | rc = alloc_xenballooned_pages(numpgs, pages); |
d71f5139 MR |
432 | if (rc != 0) { |
433 | pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, | |
434 | numpgs, rc); | |
435 | kfree(pages); | |
436 | return -ENOMEM; | |
437 | } | |
a5deabe0 | 438 | BUG_ON(vma->vm_private_data != NULL); |
d71f5139 MR |
439 | vma->vm_private_data = pages; |
440 | ||
441 | return 0; | |
442 | } | |
443 | ||
7cbea8dc | 444 | static const struct vm_operations_struct privcmd_vm_ops; |
f31fdf51 | 445 | |
4610d240 PD |
446 | static long privcmd_ioctl_mmap_batch( |
447 | struct file *file, void __user *udata, int version) | |
1c5de193 | 448 | { |
4610d240 | 449 | struct privcmd_data *data = file->private_data; |
1c5de193 | 450 | int ret; |
ceb90fa0 | 451 | struct privcmd_mmapbatch_v2 m; |
1c5de193 JF |
452 | struct mm_struct *mm = current->mm; |
453 | struct vm_area_struct *vma; | |
454 | unsigned long nr_pages; | |
455 | LIST_HEAD(pagelist); | |
456 | struct mmap_batch_state state; | |
457 | ||
ceb90fa0 ALC |
458 | switch (version) { |
459 | case 1: | |
460 | if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) | |
461 | return -EFAULT; | |
462 | /* Returns per-frame error in m.arr. */ | |
463 | m.err = NULL; | |
464 | if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) | |
465 | return -EFAULT; | |
466 | break; | |
467 | case 2: | |
468 | if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) | |
469 | return -EFAULT; | |
470 | /* Returns per-frame error code in m.err. */ | |
471 | if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) | |
472 | return -EFAULT; | |
473 | break; | |
474 | default: | |
475 | return -EINVAL; | |
476 | } | |
1c5de193 | 477 | |
4610d240 PD |
478 | /* If restriction is in place, check the domid matches */ |
479 | if (data->domid != DOMID_INVALID && data->domid != m.dom) | |
480 | return -EPERM; | |
481 | ||
5995a68a | 482 | nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE); |
1c5de193 JF |
483 | if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) |
484 | return -EINVAL; | |
485 | ||
ceb90fa0 | 486 | ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); |
1c5de193 | 487 | |
ceb90fa0 | 488 | if (ret) |
1c5de193 | 489 | goto out; |
ceb90fa0 ALC |
490 | if (list_empty(&pagelist)) { |
491 | ret = -EINVAL; | |
492 | goto out; | |
493 | } | |
494 | ||
99beae6c ALC |
495 | if (version == 2) { |
496 | /* Zero error array now to only copy back actual errors. */ | |
497 | if (clear_user(m.err, sizeof(int) * m.num)) { | |
498 | ret = -EFAULT; | |
499 | goto out; | |
500 | } | |
ceb90fa0 | 501 | } |
1c5de193 JF |
502 | |
503 | down_write(&mm->mmap_sem); | |
504 | ||
505 | vma = find_vma(mm, m.addr); | |
1c5de193 | 506 | if (!vma || |
a5deabe0 | 507 | vma->vm_ops != &privcmd_vm_ops) { |
68fa965d | 508 | ret = -EINVAL; |
a5deabe0 | 509 | goto out_unlock; |
1c5de193 | 510 | } |
a5deabe0 ALC |
511 | |
512 | /* | |
513 | * Caller must either: | |
514 | * | |
515 | * Map the whole VMA range, which will also allocate all the | |
516 | * pages required for the auto_translated_physmap case. | |
517 | * | |
518 | * Or | |
519 | * | |
520 | * Map unmapped holes left from a previous map attempt (e.g., | |
521 | * because those foreign frames were previously paged out). | |
522 | */ | |
523 | if (vma->vm_private_data == NULL) { | |
524 | if (m.addr != vma->vm_start || | |
525 | m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) { | |
526 | ret = -EINVAL; | |
527 | goto out_unlock; | |
528 | } | |
529 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | |
5995a68a | 530 | ret = alloc_empty_pages(vma, nr_pages); |
a5deabe0 ALC |
531 | if (ret < 0) |
532 | goto out_unlock; | |
533 | } else | |
534 | vma->vm_private_data = PRIV_VMA_LOCKED; | |
535 | } else { | |
536 | if (m.addr < vma->vm_start || | |
537 | m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) { | |
538 | ret = -EINVAL; | |
539 | goto out_unlock; | |
540 | } | |
541 | if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) { | |
542 | ret = -EINVAL; | |
543 | goto out_unlock; | |
d71f5139 MR |
544 | } |
545 | } | |
1c5de193 | 546 | |
ceb90fa0 ALC |
547 | state.domain = m.dom; |
548 | state.vma = vma; | |
549 | state.va = m.addr; | |
d71f5139 | 550 | state.index = 0; |
ceb90fa0 | 551 | state.global_error = 0; |
99beae6c | 552 | state.version = version; |
1c5de193 | 553 | |
5995a68a | 554 | BUILD_BUG_ON(((PAGE_SIZE / sizeof(xen_pfn_t)) % XEN_PFN_PER_PAGE) != 0); |
ceb90fa0 | 555 | /* mmap_batch_fn guarantees ret == 0 */ |
4e8c0c8c DV |
556 | BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t), |
557 | &pagelist, mmap_batch_fn, &state)); | |
1c5de193 JF |
558 | |
559 | up_write(&mm->mmap_sem); | |
560 | ||
99beae6c ALC |
561 | if (state.global_error) { |
562 | /* Write back errors in second pass. */ | |
a13d7201 | 563 | state.user_gfn = (xen_pfn_t *)m.arr; |
99beae6c | 564 | state.user_err = m.err; |
4e8c0c8c DV |
565 | ret = traverse_pages_block(m.num, sizeof(xen_pfn_t), |
566 | &pagelist, mmap_return_errors, &state); | |
99beae6c ALC |
567 | } else |
568 | ret = 0; | |
ceb90fa0 ALC |
569 | |
570 | /* If we have not had any EFAULT-like global errors then set the global | |
571 | * error to -ENOENT if necessary. */ | |
572 | if ((ret == 0) && (state.global_error == -ENOENT)) | |
573 | ret = -ENOENT; | |
1c5de193 JF |
574 | |
575 | out: | |
576 | free_page_list(&pagelist); | |
1c5de193 | 577 | return ret; |
a5deabe0 ALC |
578 | |
579 | out_unlock: | |
580 | up_write(&mm->mmap_sem); | |
581 | goto out; | |
1c5de193 JF |
582 | } |
583 | ||
ab520be8 PD |
584 | static int lock_pages( |
585 | struct privcmd_dm_op_buf kbufs[], unsigned int num, | |
586 | struct page *pages[], unsigned int nr_pages) | |
587 | { | |
588 | unsigned int i; | |
589 | ||
590 | for (i = 0; i < num; i++) { | |
591 | unsigned int requested; | |
592 | int pinned; | |
593 | ||
594 | requested = DIV_ROUND_UP( | |
595 | offset_in_page(kbufs[i].uptr) + kbufs[i].size, | |
596 | PAGE_SIZE); | |
597 | if (requested > nr_pages) | |
598 | return -ENOSPC; | |
599 | ||
600 | pinned = get_user_pages_fast( | |
601 | (unsigned long) kbufs[i].uptr, | |
602 | requested, FOLL_WRITE, pages); | |
603 | if (pinned < 0) | |
604 | return pinned; | |
605 | ||
606 | nr_pages -= pinned; | |
607 | pages += pinned; | |
608 | } | |
609 | ||
610 | return 0; | |
611 | } | |
612 | ||
613 | static void unlock_pages(struct page *pages[], unsigned int nr_pages) | |
614 | { | |
615 | unsigned int i; | |
616 | ||
617 | if (!pages) | |
618 | return; | |
619 | ||
620 | for (i = 0; i < nr_pages; i++) { | |
621 | if (pages[i]) | |
622 | put_page(pages[i]); | |
623 | } | |
624 | } | |
625 | ||
4610d240 | 626 | static long privcmd_ioctl_dm_op(struct file *file, void __user *udata) |
ab520be8 | 627 | { |
4610d240 | 628 | struct privcmd_data *data = file->private_data; |
ab520be8 PD |
629 | struct privcmd_dm_op kdata; |
630 | struct privcmd_dm_op_buf *kbufs; | |
631 | unsigned int nr_pages = 0; | |
632 | struct page **pages = NULL; | |
633 | struct xen_dm_op_buf *xbufs = NULL; | |
634 | unsigned int i; | |
635 | long rc; | |
636 | ||
637 | if (copy_from_user(&kdata, udata, sizeof(kdata))) | |
638 | return -EFAULT; | |
639 | ||
4610d240 PD |
640 | /* If restriction is in place, check the domid matches */ |
641 | if (data->domid != DOMID_INVALID && data->domid != kdata.dom) | |
642 | return -EPERM; | |
643 | ||
ab520be8 PD |
644 | if (kdata.num == 0) |
645 | return 0; | |
646 | ||
647 | if (kdata.num > privcmd_dm_op_max_num) | |
648 | return -E2BIG; | |
649 | ||
650 | kbufs = kcalloc(kdata.num, sizeof(*kbufs), GFP_KERNEL); | |
651 | if (!kbufs) | |
652 | return -ENOMEM; | |
653 | ||
654 | if (copy_from_user(kbufs, kdata.ubufs, | |
655 | sizeof(*kbufs) * kdata.num)) { | |
656 | rc = -EFAULT; | |
657 | goto out; | |
658 | } | |
659 | ||
660 | for (i = 0; i < kdata.num; i++) { | |
661 | if (kbufs[i].size > privcmd_dm_op_buf_max_size) { | |
662 | rc = -E2BIG; | |
663 | goto out; | |
664 | } | |
665 | ||
666 | if (!access_ok(VERIFY_WRITE, kbufs[i].uptr, | |
667 | kbufs[i].size)) { | |
668 | rc = -EFAULT; | |
669 | goto out; | |
670 | } | |
671 | ||
672 | nr_pages += DIV_ROUND_UP( | |
673 | offset_in_page(kbufs[i].uptr) + kbufs[i].size, | |
674 | PAGE_SIZE); | |
675 | } | |
676 | ||
677 | pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); | |
678 | if (!pages) { | |
679 | rc = -ENOMEM; | |
680 | goto out; | |
681 | } | |
682 | ||
683 | xbufs = kcalloc(kdata.num, sizeof(*xbufs), GFP_KERNEL); | |
684 | if (!xbufs) { | |
685 | rc = -ENOMEM; | |
686 | goto out; | |
687 | } | |
688 | ||
689 | rc = lock_pages(kbufs, kdata.num, pages, nr_pages); | |
690 | if (rc) | |
691 | goto out; | |
692 | ||
693 | for (i = 0; i < kdata.num; i++) { | |
694 | set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr); | |
695 | xbufs[i].size = kbufs[i].size; | |
696 | } | |
697 | ||
698 | xen_preemptible_hcall_begin(); | |
699 | rc = HYPERVISOR_dm_op(kdata.dom, kdata.num, xbufs); | |
700 | xen_preemptible_hcall_end(); | |
701 | ||
702 | out: | |
703 | unlock_pages(pages, nr_pages); | |
704 | kfree(xbufs); | |
705 | kfree(pages); | |
706 | kfree(kbufs); | |
707 | ||
708 | return rc; | |
709 | } | |
710 | ||
4610d240 PD |
711 | static long privcmd_ioctl_restrict(struct file *file, void __user *udata) |
712 | { | |
713 | struct privcmd_data *data = file->private_data; | |
714 | domid_t dom; | |
715 | ||
716 | if (copy_from_user(&dom, udata, sizeof(dom))) | |
717 | return -EFAULT; | |
718 | ||
719 | /* Set restriction to the specified domain, or check it matches */ | |
720 | if (data->domid == DOMID_INVALID) | |
721 | data->domid = dom; | |
722 | else if (data->domid != dom) | |
723 | return -EINVAL; | |
724 | ||
725 | return 0; | |
726 | } | |
727 | ||
1c5de193 JF |
728 | static long privcmd_ioctl(struct file *file, |
729 | unsigned int cmd, unsigned long data) | |
730 | { | |
dc9eab6f | 731 | int ret = -ENOTTY; |
1c5de193 JF |
732 | void __user *udata = (void __user *) data; |
733 | ||
734 | switch (cmd) { | |
735 | case IOCTL_PRIVCMD_HYPERCALL: | |
4610d240 | 736 | ret = privcmd_ioctl_hypercall(file, udata); |
1c5de193 JF |
737 | break; |
738 | ||
739 | case IOCTL_PRIVCMD_MMAP: | |
4610d240 | 740 | ret = privcmd_ioctl_mmap(file, udata); |
1c5de193 JF |
741 | break; |
742 | ||
743 | case IOCTL_PRIVCMD_MMAPBATCH: | |
4610d240 | 744 | ret = privcmd_ioctl_mmap_batch(file, udata, 1); |
ceb90fa0 ALC |
745 | break; |
746 | ||
747 | case IOCTL_PRIVCMD_MMAPBATCH_V2: | |
4610d240 | 748 | ret = privcmd_ioctl_mmap_batch(file, udata, 2); |
1c5de193 JF |
749 | break; |
750 | ||
ab520be8 | 751 | case IOCTL_PRIVCMD_DM_OP: |
4610d240 PD |
752 | ret = privcmd_ioctl_dm_op(file, udata); |
753 | break; | |
754 | ||
755 | case IOCTL_PRIVCMD_RESTRICT: | |
756 | ret = privcmd_ioctl_restrict(file, udata); | |
ab520be8 PD |
757 | break; |
758 | ||
1c5de193 | 759 | default: |
1c5de193 JF |
760 | break; |
761 | } | |
762 | ||
763 | return ret; | |
764 | } | |
765 | ||
4610d240 PD |
766 | static int privcmd_open(struct inode *ino, struct file *file) |
767 | { | |
768 | struct privcmd_data *data = kzalloc(sizeof(*data), GFP_KERNEL); | |
769 | ||
770 | if (!data) | |
771 | return -ENOMEM; | |
772 | ||
773 | /* DOMID_INVALID implies no restriction */ | |
774 | data->domid = DOMID_INVALID; | |
775 | ||
776 | file->private_data = data; | |
777 | return 0; | |
778 | } | |
779 | ||
780 | static int privcmd_release(struct inode *ino, struct file *file) | |
781 | { | |
782 | struct privcmd_data *data = file->private_data; | |
783 | ||
784 | kfree(data); | |
785 | return 0; | |
786 | } | |
787 | ||
d71f5139 MR |
788 | static void privcmd_close(struct vm_area_struct *vma) |
789 | { | |
790 | struct page **pages = vma->vm_private_data; | |
c7ebf9d9 | 791 | int numpgs = vma_pages(vma); |
5995a68a | 792 | int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT; |
b6497b38 | 793 | int rc; |
d71f5139 | 794 | |
9eff37a8 | 795 | if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages) |
d71f5139 MR |
796 | return; |
797 | ||
5995a68a | 798 | rc = xen_unmap_domain_gfn_range(vma, numgfns, pages); |
b6497b38 IC |
799 | if (rc == 0) |
800 | free_xenballooned_pages(numpgs, pages); | |
801 | else | |
802 | pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n", | |
803 | numpgs, rc); | |
d71f5139 MR |
804 | kfree(pages); |
805 | } | |
806 | ||
11bac800 | 807 | static int privcmd_fault(struct vm_fault *vmf) |
1c5de193 | 808 | { |
441c7416 | 809 | printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", |
11bac800 | 810 | vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end, |
1a29d85e | 811 | vmf->pgoff, (void *)vmf->address); |
441c7416 | 812 | |
1c5de193 JF |
813 | return VM_FAULT_SIGBUS; |
814 | } | |
815 | ||
7cbea8dc | 816 | static const struct vm_operations_struct privcmd_vm_ops = { |
d71f5139 | 817 | .close = privcmd_close, |
1c5de193 JF |
818 | .fault = privcmd_fault |
819 | }; | |
820 | ||
821 | static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) | |
822 | { | |
e060e7af SS |
823 | /* DONTCOPY is essential for Xen because copy_page_range doesn't know |
824 | * how to recreate these mappings */ | |
314e51b9 KK |
825 | vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY | |
826 | VM_DONTEXPAND | VM_DONTDUMP; | |
1c5de193 JF |
827 | vma->vm_ops = &privcmd_vm_ops; |
828 | vma->vm_private_data = NULL; | |
829 | ||
830 | return 0; | |
831 | } | |
832 | ||
a5deabe0 ALC |
833 | /* |
834 | * For MMAPBATCH*. This allows asserting the singleshot mapping | |
835 | * on a per pfn/pte basis. Mapping calls that fail with ENOENT | |
836 | * can be then retried until success. | |
837 | */ | |
838 | static int is_mapped_fn(pte_t *pte, struct page *pmd_page, | |
839 | unsigned long addr, void *data) | |
840 | { | |
841 | return pte_none(*pte) ? 0 : -EBUSY; | |
842 | } | |
843 | ||
844 | static int privcmd_vma_range_is_mapped( | |
845 | struct vm_area_struct *vma, | |
846 | unsigned long addr, | |
847 | unsigned long nr_pages) | |
1c5de193 | 848 | { |
a5deabe0 ALC |
849 | return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT, |
850 | is_mapped_fn, NULL) != 0; | |
1c5de193 | 851 | } |
1c5de193 | 852 | |
d8414d3c BB |
853 | const struct file_operations xen_privcmd_fops = { |
854 | .owner = THIS_MODULE, | |
1c5de193 | 855 | .unlocked_ioctl = privcmd_ioctl, |
4610d240 PD |
856 | .open = privcmd_open, |
857 | .release = privcmd_release, | |
1c5de193 JF |
858 | .mmap = privcmd_mmap, |
859 | }; | |
d8414d3c BB |
860 | EXPORT_SYMBOL_GPL(xen_privcmd_fops); |
861 | ||
862 | static struct miscdevice privcmd_dev = { | |
863 | .minor = MISC_DYNAMIC_MINOR, | |
864 | .name = "xen/privcmd", | |
865 | .fops = &xen_privcmd_fops, | |
866 | }; | |
867 | ||
868 | static int __init privcmd_init(void) | |
869 | { | |
870 | int err; | |
871 | ||
872 | if (!xen_domain()) | |
873 | return -ENODEV; | |
874 | ||
875 | err = misc_register(&privcmd_dev); | |
876 | if (err != 0) { | |
283c0972 | 877 | pr_err("Could not register Xen privcmd device\n"); |
d8414d3c BB |
878 | return err; |
879 | } | |
880 | return 0; | |
881 | } | |
882 | ||
883 | static void __exit privcmd_exit(void) | |
884 | { | |
885 | misc_deregister(&privcmd_dev); | |
886 | } | |
887 | ||
888 | module_init(privcmd_init); | |
889 | module_exit(privcmd_exit); |