KVM: MMU: Rename 'release_page'
[linux-2.6-block.git] / drivers / kvm / kvm_main.c
CommitLineData
6aa8b732
AK
1/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * This module enables machines with Intel VT-x extensions to run virtual
5 * machines without emulation or binary translation.
6 *
7 * Copyright (C) 2006 Qumranet, Inc.
8 *
9 * Authors:
10 * Avi Kivity <avi@qumranet.com>
11 * Yaniv Kamay <yaniv@qumranet.com>
12 *
13 * This work is licensed under the terms of the GNU GPL, version 2. See
14 * the COPYING file in the top-level directory.
15 *
16 */
17
18#include "kvm.h"
043405e1 19#include "x86.h"
85f455f7 20#include "irq.h"
6aa8b732
AK
21
22#include <linux/kvm.h>
23#include <linux/module.h>
24#include <linux/errno.h>
6aa8b732
AK
25#include <linux/percpu.h>
26#include <linux/gfp.h>
6aa8b732
AK
27#include <linux/mm.h>
28#include <linux/miscdevice.h>
29#include <linux/vmalloc.h>
6aa8b732 30#include <linux/reboot.h>
6aa8b732
AK
31#include <linux/debugfs.h>
32#include <linux/highmem.h>
33#include <linux/file.h>
59ae6c6b 34#include <linux/sysdev.h>
774c47f1 35#include <linux/cpu.h>
e8edc6e0 36#include <linux/sched.h>
d9e368d6
AK
37#include <linux/cpumask.h>
38#include <linux/smp.h>
d6d28168 39#include <linux/anon_inodes.h>
04d2cc77 40#include <linux/profile.h>
7aa81cc0 41#include <linux/kvm_para.h>
6fc138d2 42#include <linux/pagemap.h>
8d4e1288 43#include <linux/mman.h>
6aa8b732 44
e495606d 45#include <asm/processor.h>
e495606d
AK
46#include <asm/io.h>
47#include <asm/uaccess.h>
48#include <asm/desc.h>
3e021bf5 49#include <asm/pgtable.h>
6aa8b732
AK
50
51MODULE_AUTHOR("Qumranet");
52MODULE_LICENSE("GPL");
53
e9b11c17
ZX
54DEFINE_SPINLOCK(kvm_lock);
55LIST_HEAD(vm_list);
133de902 56
1b6c0168
AK
57static cpumask_t cpus_hardware_enabled;
58
c16f862d
RR
59struct kmem_cache *kvm_vcpu_cache;
60EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
1165f5fe 61
15ad7146
AK
62static __read_mostly struct preempt_ops kvm_preempt_ops;
63
6aa8b732
AK
64static struct dentry *debugfs_dir;
65
bccf2150
AK
66static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
67 unsigned long arg);
68
5aacf0ca
JM
69static inline int valid_vcpu(int n)
70{
71 return likely(n >= 0 && n < KVM_MAX_VCPUS);
72}
73
bccf2150
AK
74/*
75 * Switches to specified vcpu, until a matching vcpu_put()
76 */
313a3dc7 77void vcpu_load(struct kvm_vcpu *vcpu)
6aa8b732 78{
15ad7146
AK
79 int cpu;
80
bccf2150 81 mutex_lock(&vcpu->mutex);
15ad7146
AK
82 cpu = get_cpu();
83 preempt_notifier_register(&vcpu->preempt_notifier);
313a3dc7 84 kvm_arch_vcpu_load(vcpu, cpu);
15ad7146 85 put_cpu();
6aa8b732
AK
86}
87
313a3dc7 88void vcpu_put(struct kvm_vcpu *vcpu)
6aa8b732 89{
15ad7146 90 preempt_disable();
313a3dc7 91 kvm_arch_vcpu_put(vcpu);
15ad7146
AK
92 preempt_notifier_unregister(&vcpu->preempt_notifier);
93 preempt_enable();
6aa8b732
AK
94 mutex_unlock(&vcpu->mutex);
95}
96
d9e368d6
AK
97static void ack_flush(void *_completed)
98{
d9e368d6
AK
99}
100
101void kvm_flush_remote_tlbs(struct kvm *kvm)
102{
49d3bd7e 103 int i, cpu;
d9e368d6
AK
104 cpumask_t cpus;
105 struct kvm_vcpu *vcpu;
d9e368d6 106
d9e368d6 107 cpus_clear(cpus);
fb3f0f51
RR
108 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
109 vcpu = kvm->vcpus[i];
110 if (!vcpu)
111 continue;
3176bc3e 112 if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
d9e368d6
AK
113 continue;
114 cpu = vcpu->cpu;
115 if (cpu != -1 && cpu != raw_smp_processor_id())
49d3bd7e 116 cpu_set(cpu, cpus);
d9e368d6 117 }
0f74a24c
AK
118 if (cpus_empty(cpus))
119 return;
120 ++kvm->stat.remote_tlb_flush;
49d3bd7e 121 smp_call_function_mask(cpus, ack_flush, NULL, 1);
d9e368d6
AK
122}
123
fb3f0f51
RR
124int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
125{
126 struct page *page;
127 int r;
128
129 mutex_init(&vcpu->mutex);
130 vcpu->cpu = -1;
fb3f0f51
RR
131 vcpu->kvm = kvm;
132 vcpu->vcpu_id = id;
b6958ce4 133 init_waitqueue_head(&vcpu->wq);
fb3f0f51
RR
134
135 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
136 if (!page) {
137 r = -ENOMEM;
138 goto fail;
139 }
140 vcpu->run = page_address(page);
141
e9b11c17 142 r = kvm_arch_vcpu_init(vcpu);
fb3f0f51 143 if (r < 0)
e9b11c17 144 goto fail_free_run;
fb3f0f51
RR
145 return 0;
146
fb3f0f51
RR
147fail_free_run:
148 free_page((unsigned long)vcpu->run);
149fail:
76fafa5e 150 return r;
fb3f0f51
RR
151}
152EXPORT_SYMBOL_GPL(kvm_vcpu_init);
153
154void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
155{
e9b11c17 156 kvm_arch_vcpu_uninit(vcpu);
fb3f0f51
RR
157 free_page((unsigned long)vcpu->run);
158}
159EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
160
f17abe9a 161static struct kvm *kvm_create_vm(void)
6aa8b732 162{
d19a9cd2 163 struct kvm *kvm = kvm_arch_create_vm();
6aa8b732 164
d19a9cd2
ZX
165 if (IS_ERR(kvm))
166 goto out;
6aa8b732 167
74906345 168 kvm_io_bus_init(&kvm->pio_bus);
11ec2804 169 mutex_init(&kvm->lock);
2eeb2e94 170 kvm_io_bus_init(&kvm->mmio_bus);
5e58cfe4
RR
171 spin_lock(&kvm_lock);
172 list_add(&kvm->vm_list, &vm_list);
173 spin_unlock(&kvm_lock);
d19a9cd2 174out:
f17abe9a
AK
175 return kvm;
176}
177
6aa8b732
AK
178/*
179 * Free any memory in @free but not in @dont.
180 */
181static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
182 struct kvm_memory_slot *dont)
183{
290fc38d
IE
184 if (!dont || free->rmap != dont->rmap)
185 vfree(free->rmap);
6aa8b732
AK
186
187 if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
188 vfree(free->dirty_bitmap);
189
6aa8b732 190 free->npages = 0;
8b6d44c7 191 free->dirty_bitmap = NULL;
8d4e1288 192 free->rmap = NULL;
6aa8b732
AK
193}
194
d19a9cd2 195void kvm_free_physmem(struct kvm *kvm)
6aa8b732
AK
196{
197 int i;
198
199 for (i = 0; i < kvm->nmemslots; ++i)
8b6d44c7 200 kvm_free_physmem_slot(&kvm->memslots[i], NULL);
6aa8b732
AK
201}
202
f17abe9a
AK
203static void kvm_destroy_vm(struct kvm *kvm)
204{
133de902
AK
205 spin_lock(&kvm_lock);
206 list_del(&kvm->vm_list);
207 spin_unlock(&kvm_lock);
74906345 208 kvm_io_bus_destroy(&kvm->pio_bus);
2eeb2e94 209 kvm_io_bus_destroy(&kvm->mmio_bus);
d19a9cd2 210 kvm_arch_destroy_vm(kvm);
f17abe9a
AK
211}
212
213static int kvm_vm_release(struct inode *inode, struct file *filp)
214{
215 struct kvm *kvm = filp->private_data;
216
217 kvm_destroy_vm(kvm);
6aa8b732
AK
218 return 0;
219}
220
6aa8b732
AK
221/*
222 * Allocate some memory and give it an address in the guest physical address
223 * space.
224 *
225 * Discontiguous memory is allowed, mostly for framebuffers.
f78e0e2e
SY
226 *
227 * Must be called holding kvm->lock.
6aa8b732 228 */
f78e0e2e
SY
229int __kvm_set_memory_region(struct kvm *kvm,
230 struct kvm_userspace_memory_region *mem,
231 int user_alloc)
6aa8b732
AK
232{
233 int r;
234 gfn_t base_gfn;
235 unsigned long npages;
236 unsigned long i;
237 struct kvm_memory_slot *memslot;
238 struct kvm_memory_slot old, new;
6aa8b732
AK
239
240 r = -EINVAL;
241 /* General sanity checks */
242 if (mem->memory_size & (PAGE_SIZE - 1))
243 goto out;
244 if (mem->guest_phys_addr & (PAGE_SIZE - 1))
245 goto out;
e0d62c7f 246 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
6aa8b732
AK
247 goto out;
248 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
249 goto out;
250
251 memslot = &kvm->memslots[mem->slot];
252 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
253 npages = mem->memory_size >> PAGE_SHIFT;
254
255 if (!npages)
256 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
257
6aa8b732
AK
258 new = old = *memslot;
259
260 new.base_gfn = base_gfn;
261 new.npages = npages;
262 new.flags = mem->flags;
263
264 /* Disallow changing a memory slot's size. */
265 r = -EINVAL;
266 if (npages && old.npages && npages != old.npages)
f78e0e2e 267 goto out_free;
6aa8b732
AK
268
269 /* Check for overlaps */
270 r = -EEXIST;
271 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
272 struct kvm_memory_slot *s = &kvm->memslots[i];
273
274 if (s == memslot)
275 continue;
276 if (!((base_gfn + npages <= s->base_gfn) ||
277 (base_gfn >= s->base_gfn + s->npages)))
f78e0e2e 278 goto out_free;
6aa8b732 279 }
6aa8b732 280
6aa8b732
AK
281 /* Free page dirty bitmap if unneeded */
282 if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
8b6d44c7 283 new.dirty_bitmap = NULL;
6aa8b732
AK
284
285 r = -ENOMEM;
286
287 /* Allocate if a slot is being created */
8d4e1288 288 if (npages && !new.rmap) {
d77c26fc 289 new.rmap = vmalloc(npages * sizeof(struct page *));
290fc38d
IE
290
291 if (!new.rmap)
f78e0e2e 292 goto out_free;
290fc38d 293
290fc38d 294 memset(new.rmap, 0, npages * sizeof(*new.rmap));
8d4e1288 295
80b14b5b 296 new.user_alloc = user_alloc;
0de10343 297 new.userspace_addr = mem->userspace_addr;
6aa8b732
AK
298 }
299
300 /* Allocate page dirty bitmap if needed */
301 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
302 unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
303
304 new.dirty_bitmap = vmalloc(dirty_bytes);
305 if (!new.dirty_bitmap)
f78e0e2e 306 goto out_free;
6aa8b732
AK
307 memset(new.dirty_bitmap, 0, dirty_bytes);
308 }
309
6aa8b732
AK
310 if (mem->slot >= kvm->nmemslots)
311 kvm->nmemslots = mem->slot + 1;
312
3ad82a7e
ZX
313 *memslot = new;
314
0de10343
ZX
315 r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
316 if (r) {
317 *memslot = old;
318 goto out_free;
82ce2c96
IE
319 }
320
6aa8b732
AK
321 kvm_free_physmem_slot(&old, &new);
322 return 0;
323
f78e0e2e 324out_free:
6aa8b732
AK
325 kvm_free_physmem_slot(&new, &old);
326out:
327 return r;
210c7c4d
IE
328
329}
f78e0e2e
SY
330EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
331
332int kvm_set_memory_region(struct kvm *kvm,
333 struct kvm_userspace_memory_region *mem,
334 int user_alloc)
335{
336 int r;
337
338 mutex_lock(&kvm->lock);
339 r = __kvm_set_memory_region(kvm, mem, user_alloc);
340 mutex_unlock(&kvm->lock);
341 return r;
342}
210c7c4d
IE
343EXPORT_SYMBOL_GPL(kvm_set_memory_region);
344
1fe779f8
CO
345int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
346 struct
347 kvm_userspace_memory_region *mem,
348 int user_alloc)
210c7c4d 349{
e0d62c7f
IE
350 if (mem->slot >= KVM_MEMORY_SLOTS)
351 return -EINVAL;
210c7c4d 352 return kvm_set_memory_region(kvm, mem, user_alloc);
6aa8b732
AK
353}
354
5bb064dc
ZX
355int kvm_get_dirty_log(struct kvm *kvm,
356 struct kvm_dirty_log *log, int *is_dirty)
6aa8b732
AK
357{
358 struct kvm_memory_slot *memslot;
359 int r, i;
360 int n;
361 unsigned long any = 0;
362
6aa8b732
AK
363 r = -EINVAL;
364 if (log->slot >= KVM_MEMORY_SLOTS)
365 goto out;
366
367 memslot = &kvm->memslots[log->slot];
368 r = -ENOENT;
369 if (!memslot->dirty_bitmap)
370 goto out;
371
cd1a4a98 372 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
6aa8b732 373
cd1a4a98 374 for (i = 0; !any && i < n/sizeof(long); ++i)
6aa8b732
AK
375 any = memslot->dirty_bitmap[i];
376
377 r = -EFAULT;
378 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
379 goto out;
380
5bb064dc
ZX
381 if (any)
382 *is_dirty = 1;
6aa8b732
AK
383
384 r = 0;
6aa8b732 385out:
6aa8b732
AK
386 return r;
387}
388
cea7bb21
IE
389int is_error_page(struct page *page)
390{
391 return page == bad_page;
392}
393EXPORT_SYMBOL_GPL(is_error_page);
394
f9d46eb0
IE
395static inline unsigned long bad_hva(void)
396{
397 return PAGE_OFFSET;
398}
399
400int kvm_is_error_hva(unsigned long addr)
401{
402 return addr == bad_hva();
403}
404EXPORT_SYMBOL_GPL(kvm_is_error_hva);
405
290fc38d 406gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
e8207547
AK
407{
408 int i;
409 struct kvm_mem_alias *alias;
410
411 for (i = 0; i < kvm->naliases; ++i) {
412 alias = &kvm->aliases[i];
413 if (gfn >= alias->base_gfn
414 && gfn < alias->base_gfn + alias->npages)
415 return alias->target_gfn + gfn - alias->base_gfn;
416 }
417 return gfn;
418}
419
420static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
6aa8b732
AK
421{
422 int i;
423
424 for (i = 0; i < kvm->nmemslots; ++i) {
425 struct kvm_memory_slot *memslot = &kvm->memslots[i];
426
427 if (gfn >= memslot->base_gfn
428 && gfn < memslot->base_gfn + memslot->npages)
429 return memslot;
430 }
8b6d44c7 431 return NULL;
6aa8b732 432}
e8207547
AK
433
434struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
435{
436 gfn = unalias_gfn(kvm, gfn);
437 return __gfn_to_memslot(kvm, gfn);
438}
6aa8b732 439
e0d62c7f
IE
440int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
441{
442 int i;
443
444 gfn = unalias_gfn(kvm, gfn);
445 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
446 struct kvm_memory_slot *memslot = &kvm->memslots[i];
447
448 if (gfn >= memslot->base_gfn
449 && gfn < memslot->base_gfn + memslot->npages)
450 return 1;
451 }
452 return 0;
453}
454EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
455
539cb660
IE
456static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
457{
458 struct kvm_memory_slot *slot;
459
460 gfn = unalias_gfn(kvm, gfn);
461 slot = __gfn_to_memslot(kvm, gfn);
462 if (!slot)
463 return bad_hva();
464 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
465}
466
aab61cc0
AL
467/*
468 * Requires current->mm->mmap_sem to be held
469 */
470static struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn)
954bbbc2 471{
8d4e1288 472 struct page *page[1];
539cb660 473 unsigned long addr;
8d4e1288 474 int npages;
954bbbc2 475
60395224
AK
476 might_sleep();
477
539cb660
IE
478 addr = gfn_to_hva(kvm, gfn);
479 if (kvm_is_error_hva(addr)) {
8a7ae055 480 get_page(bad_page);
cea7bb21 481 return bad_page;
8a7ae055 482 }
8d4e1288 483
539cb660
IE
484 npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page,
485 NULL);
486
8d4e1288
AL
487 if (npages != 1) {
488 get_page(bad_page);
489 return bad_page;
8a7ae055 490 }
8d4e1288
AL
491
492 return page[0];
954bbbc2 493}
aab61cc0
AL
494
495struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
496{
497 struct page *page;
498
499 down_read(&current->mm->mmap_sem);
500 page = __gfn_to_page(kvm, gfn);
501 up_read(&current->mm->mmap_sem);
502
503 return page;
504}
505
954bbbc2
AK
506EXPORT_SYMBOL_GPL(gfn_to_page);
507
b4231d61
IE
508void kvm_release_page_clean(struct page *page)
509{
510 put_page(page);
511}
512EXPORT_SYMBOL_GPL(kvm_release_page_clean);
513
514void kvm_release_page_dirty(struct page *page)
8a7ae055
IE
515{
516 if (!PageReserved(page))
517 SetPageDirty(page);
518 put_page(page);
519}
b4231d61 520EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
8a7ae055 521
195aefde
IE
522static int next_segment(unsigned long len, int offset)
523{
524 if (len > PAGE_SIZE - offset)
525 return PAGE_SIZE - offset;
526 else
527 return len;
528}
529
530int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
531 int len)
532{
e0506bcb
IE
533 int r;
534 unsigned long addr;
195aefde 535
e0506bcb
IE
536 addr = gfn_to_hva(kvm, gfn);
537 if (kvm_is_error_hva(addr))
538 return -EFAULT;
539 r = copy_from_user(data, (void __user *)addr + offset, len);
540 if (r)
195aefde 541 return -EFAULT;
195aefde
IE
542 return 0;
543}
544EXPORT_SYMBOL_GPL(kvm_read_guest_page);
545
546int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
547{
548 gfn_t gfn = gpa >> PAGE_SHIFT;
549 int seg;
550 int offset = offset_in_page(gpa);
551 int ret;
552
553 while ((seg = next_segment(len, offset)) != 0) {
554 ret = kvm_read_guest_page(kvm, gfn, data, offset, seg);
555 if (ret < 0)
556 return ret;
557 offset = 0;
558 len -= seg;
559 data += seg;
560 ++gfn;
561 }
562 return 0;
563}
564EXPORT_SYMBOL_GPL(kvm_read_guest);
565
566int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
567 int offset, int len)
568{
e0506bcb
IE
569 int r;
570 unsigned long addr;
195aefde 571
e0506bcb
IE
572 addr = gfn_to_hva(kvm, gfn);
573 if (kvm_is_error_hva(addr))
574 return -EFAULT;
575 r = copy_to_user((void __user *)addr + offset, data, len);
576 if (r)
195aefde 577 return -EFAULT;
195aefde
IE
578 mark_page_dirty(kvm, gfn);
579 return 0;
580}
581EXPORT_SYMBOL_GPL(kvm_write_guest_page);
582
583int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
584 unsigned long len)
585{
586 gfn_t gfn = gpa >> PAGE_SHIFT;
587 int seg;
588 int offset = offset_in_page(gpa);
589 int ret;
590
591 while ((seg = next_segment(len, offset)) != 0) {
592 ret = kvm_write_guest_page(kvm, gfn, data, offset, seg);
593 if (ret < 0)
594 return ret;
595 offset = 0;
596 len -= seg;
597 data += seg;
598 ++gfn;
599 }
600 return 0;
601}
602
603int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
604{
3e021bf5 605 return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len);
195aefde
IE
606}
607EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
608
609int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
610{
611 gfn_t gfn = gpa >> PAGE_SHIFT;
612 int seg;
613 int offset = offset_in_page(gpa);
614 int ret;
615
616 while ((seg = next_segment(len, offset)) != 0) {
617 ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
618 if (ret < 0)
619 return ret;
620 offset = 0;
621 len -= seg;
622 ++gfn;
623 }
624 return 0;
625}
626EXPORT_SYMBOL_GPL(kvm_clear_guest);
627
6aa8b732
AK
628void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
629{
31389947 630 struct kvm_memory_slot *memslot;
6aa8b732 631
3b6fff19 632 gfn = unalias_gfn(kvm, gfn);
7e9d619d
RR
633 memslot = __gfn_to_memslot(kvm, gfn);
634 if (memslot && memslot->dirty_bitmap) {
635 unsigned long rel_gfn = gfn - memslot->base_gfn;
6aa8b732 636
7e9d619d
RR
637 /* avoid RMW */
638 if (!test_bit(rel_gfn, memslot->dirty_bitmap))
639 set_bit(rel_gfn, memslot->dirty_bitmap);
6aa8b732
AK
640 }
641}
642
b6958ce4
ED
643/*
644 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
645 */
8776e519 646void kvm_vcpu_block(struct kvm_vcpu *vcpu)
d3bef15f 647{
b6958ce4
ED
648 DECLARE_WAITQUEUE(wait, current);
649
650 add_wait_queue(&vcpu->wq, &wait);
651
652 /*
653 * We will block until either an interrupt or a signal wakes us up
654 */
c5ec1534
HQ
655 while (!kvm_cpu_has_interrupt(vcpu)
656 && !signal_pending(current)
657 && vcpu->mp_state != VCPU_MP_STATE_RUNNABLE
658 && vcpu->mp_state != VCPU_MP_STATE_SIPI_RECEIVED) {
b6958ce4
ED
659 set_current_state(TASK_INTERRUPTIBLE);
660 vcpu_put(vcpu);
661 schedule();
662 vcpu_load(vcpu);
663 }
d3bef15f 664
c5ec1534 665 __set_current_state(TASK_RUNNING);
b6958ce4 666 remove_wait_queue(&vcpu->wq, &wait);
b6958ce4
ED
667}
668
6aa8b732
AK
669void kvm_resched(struct kvm_vcpu *vcpu)
670{
3fca0365
YD
671 if (!need_resched())
672 return;
6aa8b732 673 cond_resched();
6aa8b732
AK
674}
675EXPORT_SYMBOL_GPL(kvm_resched);
676
bccf2150
AK
677static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
678 struct kvm_interrupt *irq)
6aa8b732 679{
6aa8b732
AK
680 if (irq->irq < 0 || irq->irq >= 256)
681 return -EINVAL;
97222cc8
ED
682 if (irqchip_in_kernel(vcpu->kvm))
683 return -ENXIO;
bccf2150 684 vcpu_load(vcpu);
6aa8b732
AK
685
686 set_bit(irq->irq, vcpu->irq_pending);
687 set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
688
689 vcpu_put(vcpu);
690
691 return 0;
692}
693
9a2bb7f4
AK
694static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma,
695 unsigned long address,
696 int *type)
697{
698 struct kvm_vcpu *vcpu = vma->vm_file->private_data;
699 unsigned long pgoff;
700 struct page *page;
701
9a2bb7f4 702 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
039576c0
AK
703 if (pgoff == 0)
704 page = virt_to_page(vcpu->run);
705 else if (pgoff == KVM_PIO_PAGE_OFFSET)
706 page = virt_to_page(vcpu->pio_data);
707 else
9a2bb7f4 708 return NOPAGE_SIGBUS;
9a2bb7f4 709 get_page(page);
cd0d9137
NAQ
710 if (type != NULL)
711 *type = VM_FAULT_MINOR;
712
9a2bb7f4
AK
713 return page;
714}
715
716static struct vm_operations_struct kvm_vcpu_vm_ops = {
717 .nopage = kvm_vcpu_nopage,
718};
719
720static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
721{
722 vma->vm_ops = &kvm_vcpu_vm_ops;
723 return 0;
724}
725
bccf2150
AK
726static int kvm_vcpu_release(struct inode *inode, struct file *filp)
727{
728 struct kvm_vcpu *vcpu = filp->private_data;
729
730 fput(vcpu->kvm->filp);
731 return 0;
732}
733
734static struct file_operations kvm_vcpu_fops = {
735 .release = kvm_vcpu_release,
736 .unlocked_ioctl = kvm_vcpu_ioctl,
737 .compat_ioctl = kvm_vcpu_ioctl,
9a2bb7f4 738 .mmap = kvm_vcpu_mmap,
bccf2150
AK
739};
740
741/*
742 * Allocates an inode for the vcpu.
743 */
744static int create_vcpu_fd(struct kvm_vcpu *vcpu)
745{
746 int fd, r;
747 struct inode *inode;
748 struct file *file;
749
d6d28168
AK
750 r = anon_inode_getfd(&fd, &inode, &file,
751 "kvm-vcpu", &kvm_vcpu_fops, vcpu);
752 if (r)
753 return r;
bccf2150 754 atomic_inc(&vcpu->kvm->filp->f_count);
bccf2150 755 return fd;
bccf2150
AK
756}
757
c5ea7660
AK
758/*
759 * Creates some virtual cpus. Good luck creating more than one.
760 */
761static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
762{
763 int r;
764 struct kvm_vcpu *vcpu;
765
c5ea7660 766 if (!valid_vcpu(n))
fb3f0f51 767 return -EINVAL;
c5ea7660 768
e9b11c17 769 vcpu = kvm_arch_vcpu_create(kvm, n);
fb3f0f51
RR
770 if (IS_ERR(vcpu))
771 return PTR_ERR(vcpu);
c5ea7660 772
15ad7146
AK
773 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
774
26e5215f
AK
775 r = kvm_arch_vcpu_setup(vcpu);
776 if (r)
777 goto vcpu_destroy;
778
11ec2804 779 mutex_lock(&kvm->lock);
fb3f0f51
RR
780 if (kvm->vcpus[n]) {
781 r = -EEXIST;
11ec2804 782 mutex_unlock(&kvm->lock);
e9b11c17 783 goto vcpu_destroy;
fb3f0f51
RR
784 }
785 kvm->vcpus[n] = vcpu;
11ec2804 786 mutex_unlock(&kvm->lock);
c5ea7660 787
fb3f0f51 788 /* Now it's all set up, let userspace reach it */
bccf2150
AK
789 r = create_vcpu_fd(vcpu);
790 if (r < 0)
fb3f0f51
RR
791 goto unlink;
792 return r;
39c3b86e 793
fb3f0f51 794unlink:
11ec2804 795 mutex_lock(&kvm->lock);
fb3f0f51 796 kvm->vcpus[n] = NULL;
11ec2804 797 mutex_unlock(&kvm->lock);
e9b11c17 798vcpu_destroy:
d40ccc62 799 kvm_arch_vcpu_destroy(vcpu);
c5ea7660
AK
800 return r;
801}
802
1961d276
AK
803static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
804{
805 if (sigset) {
806 sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP));
807 vcpu->sigset_active = 1;
808 vcpu->sigset = *sigset;
809 } else
810 vcpu->sigset_active = 0;
811 return 0;
812}
813
bccf2150
AK
814static long kvm_vcpu_ioctl(struct file *filp,
815 unsigned int ioctl, unsigned long arg)
6aa8b732 816{
bccf2150 817 struct kvm_vcpu *vcpu = filp->private_data;
2f366987 818 void __user *argp = (void __user *)arg;
313a3dc7 819 int r;
6aa8b732
AK
820
821 switch (ioctl) {
9a2bb7f4 822 case KVM_RUN:
f0fe5108
AK
823 r = -EINVAL;
824 if (arg)
825 goto out;
b6c7a5dc 826 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
6aa8b732 827 break;
6aa8b732
AK
828 case KVM_GET_REGS: {
829 struct kvm_regs kvm_regs;
830
bccf2150 831 memset(&kvm_regs, 0, sizeof kvm_regs);
b6c7a5dc 832 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, &kvm_regs);
6aa8b732
AK
833 if (r)
834 goto out;
835 r = -EFAULT;
2f366987 836 if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs))
6aa8b732
AK
837 goto out;
838 r = 0;
839 break;
840 }
841 case KVM_SET_REGS: {
842 struct kvm_regs kvm_regs;
843
844 r = -EFAULT;
2f366987 845 if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs))
6aa8b732 846 goto out;
b6c7a5dc 847 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, &kvm_regs);
6aa8b732
AK
848 if (r)
849 goto out;
850 r = 0;
851 break;
852 }
853 case KVM_GET_SREGS: {
854 struct kvm_sregs kvm_sregs;
855
bccf2150 856 memset(&kvm_sregs, 0, sizeof kvm_sregs);
b6c7a5dc 857 r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs);
6aa8b732
AK
858 if (r)
859 goto out;
860 r = -EFAULT;
2f366987 861 if (copy_to_user(argp, &kvm_sregs, sizeof kvm_sregs))
6aa8b732
AK
862 goto out;
863 r = 0;
864 break;
865 }
866 case KVM_SET_SREGS: {
867 struct kvm_sregs kvm_sregs;
868
869 r = -EFAULT;
2f366987 870 if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs))
6aa8b732 871 goto out;
b6c7a5dc 872 r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs);
6aa8b732
AK
873 if (r)
874 goto out;
875 r = 0;
876 break;
877 }
878 case KVM_TRANSLATE: {
879 struct kvm_translation tr;
880
881 r = -EFAULT;
2f366987 882 if (copy_from_user(&tr, argp, sizeof tr))
6aa8b732 883 goto out;
8b006791 884 r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
6aa8b732
AK
885 if (r)
886 goto out;
887 r = -EFAULT;
2f366987 888 if (copy_to_user(argp, &tr, sizeof tr))
6aa8b732
AK
889 goto out;
890 r = 0;
891 break;
892 }
893 case KVM_INTERRUPT: {
894 struct kvm_interrupt irq;
895
896 r = -EFAULT;
2f366987 897 if (copy_from_user(&irq, argp, sizeof irq))
6aa8b732 898 goto out;
bccf2150 899 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
6aa8b732
AK
900 if (r)
901 goto out;
902 r = 0;
903 break;
904 }
905 case KVM_DEBUG_GUEST: {
906 struct kvm_debug_guest dbg;
907
908 r = -EFAULT;
2f366987 909 if (copy_from_user(&dbg, argp, sizeof dbg))
6aa8b732 910 goto out;
b6c7a5dc 911 r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg);
6aa8b732
AK
912 if (r)
913 goto out;
914 r = 0;
915 break;
916 }
1961d276
AK
917 case KVM_SET_SIGNAL_MASK: {
918 struct kvm_signal_mask __user *sigmask_arg = argp;
919 struct kvm_signal_mask kvm_sigmask;
920 sigset_t sigset, *p;
921
922 p = NULL;
923 if (argp) {
924 r = -EFAULT;
925 if (copy_from_user(&kvm_sigmask, argp,
926 sizeof kvm_sigmask))
927 goto out;
928 r = -EINVAL;
929 if (kvm_sigmask.len != sizeof sigset)
930 goto out;
931 r = -EFAULT;
932 if (copy_from_user(&sigset, sigmask_arg->sigset,
933 sizeof sigset))
934 goto out;
935 p = &sigset;
936 }
937 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
938 break;
939 }
b8836737
AK
940 case KVM_GET_FPU: {
941 struct kvm_fpu fpu;
942
943 memset(&fpu, 0, sizeof fpu);
d0752060 944 r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, &fpu);
b8836737
AK
945 if (r)
946 goto out;
947 r = -EFAULT;
948 if (copy_to_user(argp, &fpu, sizeof fpu))
949 goto out;
950 r = 0;
951 break;
952 }
953 case KVM_SET_FPU: {
954 struct kvm_fpu fpu;
955
956 r = -EFAULT;
957 if (copy_from_user(&fpu, argp, sizeof fpu))
958 goto out;
d0752060 959 r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, &fpu);
b8836737
AK
960 if (r)
961 goto out;
962 r = 0;
963 break;
964 }
bccf2150 965 default:
313a3dc7 966 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
bccf2150
AK
967 }
968out:
969 return r;
970}
971
972static long kvm_vm_ioctl(struct file *filp,
973 unsigned int ioctl, unsigned long arg)
974{
975 struct kvm *kvm = filp->private_data;
976 void __user *argp = (void __user *)arg;
1fe779f8 977 int r;
bccf2150
AK
978
979 switch (ioctl) {
980 case KVM_CREATE_VCPU:
981 r = kvm_vm_ioctl_create_vcpu(kvm, arg);
982 if (r < 0)
983 goto out;
984 break;
6fc138d2
IE
985 case KVM_SET_USER_MEMORY_REGION: {
986 struct kvm_userspace_memory_region kvm_userspace_mem;
987
988 r = -EFAULT;
989 if (copy_from_user(&kvm_userspace_mem, argp,
990 sizeof kvm_userspace_mem))
991 goto out;
992
993 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1);
6aa8b732
AK
994 if (r)
995 goto out;
996 break;
997 }
998 case KVM_GET_DIRTY_LOG: {
999 struct kvm_dirty_log log;
1000
1001 r = -EFAULT;
2f366987 1002 if (copy_from_user(&log, argp, sizeof log))
6aa8b732 1003 goto out;
2c6f5df9 1004 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
6aa8b732
AK
1005 if (r)
1006 goto out;
1007 break;
1008 }
f17abe9a 1009 default:
1fe779f8 1010 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
f17abe9a
AK
1011 }
1012out:
1013 return r;
1014}
1015
1016static struct page *kvm_vm_nopage(struct vm_area_struct *vma,
1017 unsigned long address,
1018 int *type)
1019{
1020 struct kvm *kvm = vma->vm_file->private_data;
1021 unsigned long pgoff;
f17abe9a
AK
1022 struct page *page;
1023
f17abe9a 1024 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
e0d62c7f
IE
1025 if (!kvm_is_visible_gfn(kvm, pgoff))
1026 return NOPAGE_SIGBUS;
aab61cc0
AL
1027 /* current->mm->mmap_sem is already held so call lockless version */
1028 page = __gfn_to_page(kvm, pgoff);
8a7ae055 1029 if (is_error_page(page)) {
b4231d61 1030 kvm_release_page_clean(page);
f17abe9a 1031 return NOPAGE_SIGBUS;
8a7ae055 1032 }
cd0d9137
NAQ
1033 if (type != NULL)
1034 *type = VM_FAULT_MINOR;
1035
f17abe9a
AK
1036 return page;
1037}
1038
1039static struct vm_operations_struct kvm_vm_vm_ops = {
1040 .nopage = kvm_vm_nopage,
1041};
1042
1043static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
1044{
1045 vma->vm_ops = &kvm_vm_vm_ops;
1046 return 0;
1047}
1048
1049static struct file_operations kvm_vm_fops = {
1050 .release = kvm_vm_release,
1051 .unlocked_ioctl = kvm_vm_ioctl,
1052 .compat_ioctl = kvm_vm_ioctl,
1053 .mmap = kvm_vm_mmap,
1054};
1055
1056static int kvm_dev_ioctl_create_vm(void)
1057{
1058 int fd, r;
1059 struct inode *inode;
1060 struct file *file;
1061 struct kvm *kvm;
1062
f17abe9a 1063 kvm = kvm_create_vm();
d6d28168
AK
1064 if (IS_ERR(kvm))
1065 return PTR_ERR(kvm);
1066 r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
1067 if (r) {
1068 kvm_destroy_vm(kvm);
1069 return r;
f17abe9a
AK
1070 }
1071
bccf2150 1072 kvm->filp = file;
f17abe9a 1073
f17abe9a 1074 return fd;
f17abe9a
AK
1075}
1076
1077static long kvm_dev_ioctl(struct file *filp,
1078 unsigned int ioctl, unsigned long arg)
1079{
1080 void __user *argp = (void __user *)arg;
07c45a36 1081 long r = -EINVAL;
f17abe9a
AK
1082
1083 switch (ioctl) {
1084 case KVM_GET_API_VERSION:
f0fe5108
AK
1085 r = -EINVAL;
1086 if (arg)
1087 goto out;
f17abe9a
AK
1088 r = KVM_API_VERSION;
1089 break;
1090 case KVM_CREATE_VM:
f0fe5108
AK
1091 r = -EINVAL;
1092 if (arg)
1093 goto out;
f17abe9a
AK
1094 r = kvm_dev_ioctl_create_vm();
1095 break;
018d00d2
ZX
1096 case KVM_CHECK_EXTENSION:
1097 r = kvm_dev_ioctl_check_extension((long)argp);
5d308f45 1098 break;
07c45a36
AK
1099 case KVM_GET_VCPU_MMAP_SIZE:
1100 r = -EINVAL;
1101 if (arg)
1102 goto out;
039576c0 1103 r = 2 * PAGE_SIZE;
07c45a36 1104 break;
6aa8b732 1105 default:
043405e1 1106 return kvm_arch_dev_ioctl(filp, ioctl, arg);
6aa8b732
AK
1107 }
1108out:
1109 return r;
1110}
1111
6aa8b732 1112static struct file_operations kvm_chardev_ops = {
6aa8b732
AK
1113 .unlocked_ioctl = kvm_dev_ioctl,
1114 .compat_ioctl = kvm_dev_ioctl,
6aa8b732
AK
1115};
1116
1117static struct miscdevice kvm_dev = {
bbe4432e 1118 KVM_MINOR,
6aa8b732
AK
1119 "kvm",
1120 &kvm_chardev_ops,
1121};
1122
1b6c0168
AK
1123static void hardware_enable(void *junk)
1124{
1125 int cpu = raw_smp_processor_id();
1126
1127 if (cpu_isset(cpu, cpus_hardware_enabled))
1128 return;
1129 cpu_set(cpu, cpus_hardware_enabled);
e9b11c17 1130 kvm_arch_hardware_enable(NULL);
1b6c0168
AK
1131}
1132
1133static void hardware_disable(void *junk)
1134{
1135 int cpu = raw_smp_processor_id();
1136
1137 if (!cpu_isset(cpu, cpus_hardware_enabled))
1138 return;
1139 cpu_clear(cpu, cpus_hardware_enabled);
1140 decache_vcpus_on_cpu(cpu);
e9b11c17 1141 kvm_arch_hardware_disable(NULL);
1b6c0168
AK
1142}
1143
774c47f1
AK
1144static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
1145 void *v)
1146{
1147 int cpu = (long)v;
1148
1a6f4d7f 1149 val &= ~CPU_TASKS_FROZEN;
774c47f1 1150 switch (val) {
cec9ad27 1151 case CPU_DYING:
6ec8a856
AK
1152 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
1153 cpu);
1154 hardware_disable(NULL);
1155 break;
774c47f1 1156 case CPU_UP_CANCELED:
43934a38
JK
1157 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
1158 cpu);
1b6c0168 1159 smp_call_function_single(cpu, hardware_disable, NULL, 0, 1);
774c47f1 1160 break;
43934a38
JK
1161 case CPU_ONLINE:
1162 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
1163 cpu);
1b6c0168 1164 smp_call_function_single(cpu, hardware_enable, NULL, 0, 1);
774c47f1
AK
1165 break;
1166 }
1167 return NOTIFY_OK;
1168}
1169
9a2b85c6 1170static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
d77c26fc 1171 void *v)
9a2b85c6
RR
1172{
1173 if (val == SYS_RESTART) {
1174 /*
1175 * Some (well, at least mine) BIOSes hang on reboot if
1176 * in vmx root mode.
1177 */
1178 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
1179 on_each_cpu(hardware_disable, NULL, 0, 1);
1180 }
1181 return NOTIFY_OK;
1182}
1183
1184static struct notifier_block kvm_reboot_notifier = {
1185 .notifier_call = kvm_reboot,
1186 .priority = 0,
1187};
1188
2eeb2e94
GH
1189void kvm_io_bus_init(struct kvm_io_bus *bus)
1190{
1191 memset(bus, 0, sizeof(*bus));
1192}
1193
1194void kvm_io_bus_destroy(struct kvm_io_bus *bus)
1195{
1196 int i;
1197
1198 for (i = 0; i < bus->dev_count; i++) {
1199 struct kvm_io_device *pos = bus->devs[i];
1200
1201 kvm_iodevice_destructor(pos);
1202 }
1203}
1204
1205struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr)
1206{
1207 int i;
1208
1209 for (i = 0; i < bus->dev_count; i++) {
1210 struct kvm_io_device *pos = bus->devs[i];
1211
1212 if (pos->in_range(pos, addr))
1213 return pos;
1214 }
1215
1216 return NULL;
1217}
1218
1219void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
1220{
1221 BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
1222
1223 bus->devs[bus->dev_count++] = dev;
1224}
1225
774c47f1
AK
1226static struct notifier_block kvm_cpu_notifier = {
1227 .notifier_call = kvm_cpu_hotplug,
1228 .priority = 20, /* must be > scheduler priority */
1229};
1230
ba1389b7
AK
1231static u64 vm_stat_get(void *_offset)
1232{
1233 unsigned offset = (long)_offset;
1234 u64 total = 0;
1235 struct kvm *kvm;
1236
1237 spin_lock(&kvm_lock);
1238 list_for_each_entry(kvm, &vm_list, vm_list)
1239 total += *(u32 *)((void *)kvm + offset);
1240 spin_unlock(&kvm_lock);
1241 return total;
1242}
1243
1244DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n");
1245
1246static u64 vcpu_stat_get(void *_offset)
1165f5fe
AK
1247{
1248 unsigned offset = (long)_offset;
1249 u64 total = 0;
1250 struct kvm *kvm;
1251 struct kvm_vcpu *vcpu;
1252 int i;
1253
1254 spin_lock(&kvm_lock);
1255 list_for_each_entry(kvm, &vm_list, vm_list)
1256 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
fb3f0f51
RR
1257 vcpu = kvm->vcpus[i];
1258 if (vcpu)
1259 total += *(u32 *)((void *)vcpu + offset);
1165f5fe
AK
1260 }
1261 spin_unlock(&kvm_lock);
1262 return total;
1263}
1264
ba1389b7
AK
1265DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
1266
1267static struct file_operations *stat_fops[] = {
1268 [KVM_STAT_VCPU] = &vcpu_stat_fops,
1269 [KVM_STAT_VM] = &vm_stat_fops,
1270};
1165f5fe 1271
a16b043c 1272static void kvm_init_debug(void)
6aa8b732
AK
1273{
1274 struct kvm_stats_debugfs_item *p;
1275
8b6d44c7 1276 debugfs_dir = debugfs_create_dir("kvm", NULL);
6aa8b732 1277 for (p = debugfs_entries; p->name; ++p)
1165f5fe
AK
1278 p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir,
1279 (void *)(long)p->offset,
ba1389b7 1280 stat_fops[p->kind]);
6aa8b732
AK
1281}
1282
1283static void kvm_exit_debug(void)
1284{
1285 struct kvm_stats_debugfs_item *p;
1286
1287 for (p = debugfs_entries; p->name; ++p)
1288 debugfs_remove(p->dentry);
1289 debugfs_remove(debugfs_dir);
1290}
1291
59ae6c6b
AK
1292static int kvm_suspend(struct sys_device *dev, pm_message_t state)
1293{
4267c41a 1294 hardware_disable(NULL);
59ae6c6b
AK
1295 return 0;
1296}
1297
1298static int kvm_resume(struct sys_device *dev)
1299{
4267c41a 1300 hardware_enable(NULL);
59ae6c6b
AK
1301 return 0;
1302}
1303
1304static struct sysdev_class kvm_sysdev_class = {
af5ca3f4 1305 .name = "kvm",
59ae6c6b
AK
1306 .suspend = kvm_suspend,
1307 .resume = kvm_resume,
1308};
1309
1310static struct sys_device kvm_sysdev = {
1311 .id = 0,
1312 .cls = &kvm_sysdev_class,
1313};
1314
cea7bb21 1315struct page *bad_page;
6aa8b732 1316
15ad7146
AK
1317static inline
1318struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
1319{
1320 return container_of(pn, struct kvm_vcpu, preempt_notifier);
1321}
1322
1323static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
1324{
1325 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
1326
e9b11c17 1327 kvm_arch_vcpu_load(vcpu, cpu);
15ad7146
AK
1328}
1329
1330static void kvm_sched_out(struct preempt_notifier *pn,
1331 struct task_struct *next)
1332{
1333 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
1334
e9b11c17 1335 kvm_arch_vcpu_put(vcpu);
15ad7146
AK
1336}
1337
f8c16bba 1338int kvm_init(void *opaque, unsigned int vcpu_size,
c16f862d 1339 struct module *module)
6aa8b732
AK
1340{
1341 int r;
002c7f7c 1342 int cpu;
6aa8b732 1343
cb498ea2
ZX
1344 kvm_init_debug();
1345
f8c16bba
ZX
1346 r = kvm_arch_init(opaque);
1347 if (r)
1348 goto out4;
cb498ea2
ZX
1349
1350 bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1351
1352 if (bad_page == NULL) {
1353 r = -ENOMEM;
1354 goto out;
1355 }
1356
e9b11c17 1357 r = kvm_arch_hardware_setup();
6aa8b732 1358 if (r < 0)
ca45aaae 1359 goto out;
6aa8b732 1360
002c7f7c
YS
1361 for_each_online_cpu(cpu) {
1362 smp_call_function_single(cpu,
e9b11c17 1363 kvm_arch_check_processor_compat,
002c7f7c
YS
1364 &r, 0, 1);
1365 if (r < 0)
1366 goto out_free_0;
1367 }
1368
1b6c0168 1369 on_each_cpu(hardware_enable, NULL, 0, 1);
774c47f1
AK
1370 r = register_cpu_notifier(&kvm_cpu_notifier);
1371 if (r)
1372 goto out_free_1;
6aa8b732
AK
1373 register_reboot_notifier(&kvm_reboot_notifier);
1374
59ae6c6b
AK
1375 r = sysdev_class_register(&kvm_sysdev_class);
1376 if (r)
1377 goto out_free_2;
1378
1379 r = sysdev_register(&kvm_sysdev);
1380 if (r)
1381 goto out_free_3;
1382
c16f862d
RR
1383 /* A kmem cache lets us meet the alignment requirements of fx_save. */
1384 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
56919c5c
JP
1385 __alignof__(struct kvm_vcpu),
1386 0, NULL);
c16f862d
RR
1387 if (!kvm_vcpu_cache) {
1388 r = -ENOMEM;
1389 goto out_free_4;
1390 }
1391
6aa8b732
AK
1392 kvm_chardev_ops.owner = module;
1393
1394 r = misc_register(&kvm_dev);
1395 if (r) {
d77c26fc 1396 printk(KERN_ERR "kvm: misc device register failed\n");
6aa8b732
AK
1397 goto out_free;
1398 }
1399
15ad7146
AK
1400 kvm_preempt_ops.sched_in = kvm_sched_in;
1401 kvm_preempt_ops.sched_out = kvm_sched_out;
1402
c7addb90 1403 return 0;
6aa8b732
AK
1404
1405out_free:
c16f862d
RR
1406 kmem_cache_destroy(kvm_vcpu_cache);
1407out_free_4:
59ae6c6b
AK
1408 sysdev_unregister(&kvm_sysdev);
1409out_free_3:
1410 sysdev_class_unregister(&kvm_sysdev_class);
1411out_free_2:
6aa8b732 1412 unregister_reboot_notifier(&kvm_reboot_notifier);
774c47f1
AK
1413 unregister_cpu_notifier(&kvm_cpu_notifier);
1414out_free_1:
1b6c0168 1415 on_each_cpu(hardware_disable, NULL, 0, 1);
002c7f7c 1416out_free_0:
e9b11c17 1417 kvm_arch_hardware_unsetup();
ca45aaae 1418out:
f8c16bba 1419 kvm_arch_exit();
cb498ea2 1420 kvm_exit_debug();
cb498ea2 1421out4:
6aa8b732
AK
1422 return r;
1423}
cb498ea2 1424EXPORT_SYMBOL_GPL(kvm_init);
6aa8b732 1425
cb498ea2 1426void kvm_exit(void)
6aa8b732
AK
1427{
1428 misc_deregister(&kvm_dev);
c16f862d 1429 kmem_cache_destroy(kvm_vcpu_cache);
59ae6c6b
AK
1430 sysdev_unregister(&kvm_sysdev);
1431 sysdev_class_unregister(&kvm_sysdev_class);
6aa8b732 1432 unregister_reboot_notifier(&kvm_reboot_notifier);
59ae6c6b 1433 unregister_cpu_notifier(&kvm_cpu_notifier);
1b6c0168 1434 on_each_cpu(hardware_disable, NULL, 0, 1);
e9b11c17 1435 kvm_arch_hardware_unsetup();
f8c16bba 1436 kvm_arch_exit();
6aa8b732 1437 kvm_exit_debug();
cea7bb21 1438 __free_page(bad_page);
6aa8b732 1439}
cb498ea2 1440EXPORT_SYMBOL_GPL(kvm_exit);