KVM: Add instruction emulation statistics
[linux-2.6-block.git] / drivers / kvm / x86.c
CommitLineData
043405e1
CO
1/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * derived from drivers/kvm/kvm_main.c
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
7 *
8 * Authors:
9 * Avi Kivity <avi@qumranet.com>
10 * Yaniv Kamay <yaniv@qumranet.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory.
14 *
15 */
16
313a3dc7 17#include "kvm.h"
043405e1 18#include "x86.h"
d825ed0a 19#include "x86_emulate.h"
5fb76f9b 20#include "segment_descriptor.h"
313a3dc7
CO
21#include "irq.h"
22
23#include <linux/kvm.h>
24#include <linux/fs.h>
25#include <linux/vmalloc.h>
5fb76f9b 26#include <linux/module.h>
043405e1
CO
27
28#include <asm/uaccess.h>
d825ed0a 29#include <asm/msr.h>
043405e1 30
313a3dc7 31#define MAX_IO_MSRS 256
a03490ed
CO
32#define CR0_RESERVED_BITS \
33 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
34 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
35 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
36#define CR4_RESERVED_BITS \
37 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
38 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
39 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
40 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
41
42#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
15c4a640 43#define EFER_RESERVED_BITS 0xfffffffffffff2fe
313a3dc7 44
417bc304
HB
45#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
46
97896d04
ZX
47struct kvm_x86_ops *kvm_x86_ops;
48
417bc304
HB
49struct kvm_stats_debugfs_item debugfs_entries[] = {
50 { "pf_fixed", STAT_OFFSET(pf_fixed) },
51 { "pf_guest", STAT_OFFSET(pf_guest) },
52 { "tlb_flush", STAT_OFFSET(tlb_flush) },
53 { "invlpg", STAT_OFFSET(invlpg) },
54 { "exits", STAT_OFFSET(exits) },
55 { "io_exits", STAT_OFFSET(io_exits) },
56 { "mmio_exits", STAT_OFFSET(mmio_exits) },
57 { "signal_exits", STAT_OFFSET(signal_exits) },
58 { "irq_window", STAT_OFFSET(irq_window_exits) },
59 { "halt_exits", STAT_OFFSET(halt_exits) },
60 { "halt_wakeup", STAT_OFFSET(halt_wakeup) },
61 { "request_irq", STAT_OFFSET(request_irq_exits) },
62 { "irq_exits", STAT_OFFSET(irq_exits) },
e1beb1d3 63 { "host_state_reload", STAT_OFFSET(host_state_reload) },
417bc304 64 { "efer_reload", STAT_OFFSET(efer_reload) },
f096ed85 65 { "fpu_reload", STAT_OFFSET(fpu_reload) },
f2b5756b
AK
66 { "insn_emulation", STAT_OFFSET(insn_emulation) },
67 { "insn_emulation_fail", STAT_OFFSET(insn_emulation_fail) },
417bc304
HB
68 { NULL }
69};
70
71
5fb76f9b
CO
72unsigned long segment_base(u16 selector)
73{
74 struct descriptor_table gdt;
75 struct segment_descriptor *d;
76 unsigned long table_base;
77 unsigned long v;
78
79 if (selector == 0)
80 return 0;
81
82 asm("sgdt %0" : "=m"(gdt));
83 table_base = gdt.base;
84
85 if (selector & 4) { /* from ldt */
86 u16 ldt_selector;
87
88 asm("sldt %0" : "=g"(ldt_selector));
89 table_base = segment_base(ldt_selector);
90 }
91 d = (struct segment_descriptor *)(table_base + (selector & ~7));
92 v = d->base_low | ((unsigned long)d->base_mid << 16) |
93 ((unsigned long)d->base_high << 24);
94#ifdef CONFIG_X86_64
95 if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
96 v |= ((unsigned long) \
97 ((struct segment_descriptor_64 *)d)->base_higher) << 32;
98#endif
99 return v;
100}
101EXPORT_SYMBOL_GPL(segment_base);
102
6866b83e
CO
103u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
104{
105 if (irqchip_in_kernel(vcpu->kvm))
106 return vcpu->apic_base;
107 else
108 return vcpu->apic_base;
109}
110EXPORT_SYMBOL_GPL(kvm_get_apic_base);
111
112void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
113{
114 /* TODO: reserve bits check */
115 if (irqchip_in_kernel(vcpu->kvm))
116 kvm_lapic_set_base(vcpu, data);
117 else
118 vcpu->apic_base = data;
119}
120EXPORT_SYMBOL_GPL(kvm_set_apic_base);
121
a03490ed
CO
122static void inject_gp(struct kvm_vcpu *vcpu)
123{
124 kvm_x86_ops->inject_gp(vcpu, 0);
125}
126
127/*
128 * Load the pae pdptrs. Return true is they are all valid.
129 */
130int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
131{
132 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
133 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
134 int i;
135 int ret;
136 u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
137
138 mutex_lock(&vcpu->kvm->lock);
139 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
140 offset * sizeof(u64), sizeof(pdpte));
141 if (ret < 0) {
142 ret = 0;
143 goto out;
144 }
145 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
146 if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
147 ret = 0;
148 goto out;
149 }
150 }
151 ret = 1;
152
153 memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs));
154out:
155 mutex_unlock(&vcpu->kvm->lock);
156
157 return ret;
158}
159
160void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
161{
162 if (cr0 & CR0_RESERVED_BITS) {
163 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
164 cr0, vcpu->cr0);
165 inject_gp(vcpu);
166 return;
167 }
168
169 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
170 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
171 inject_gp(vcpu);
172 return;
173 }
174
175 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
176 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
177 "and a clear PE flag\n");
178 inject_gp(vcpu);
179 return;
180 }
181
182 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
183#ifdef CONFIG_X86_64
184 if ((vcpu->shadow_efer & EFER_LME)) {
185 int cs_db, cs_l;
186
187 if (!is_pae(vcpu)) {
188 printk(KERN_DEBUG "set_cr0: #GP, start paging "
189 "in long mode while PAE is disabled\n");
190 inject_gp(vcpu);
191 return;
192 }
193 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
194 if (cs_l) {
195 printk(KERN_DEBUG "set_cr0: #GP, start paging "
196 "in long mode while CS.L == 1\n");
197 inject_gp(vcpu);
198 return;
199
200 }
201 } else
202#endif
203 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) {
204 printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
205 "reserved bits\n");
206 inject_gp(vcpu);
207 return;
208 }
209
210 }
211
212 kvm_x86_ops->set_cr0(vcpu, cr0);
213 vcpu->cr0 = cr0;
214
215 mutex_lock(&vcpu->kvm->lock);
216 kvm_mmu_reset_context(vcpu);
217 mutex_unlock(&vcpu->kvm->lock);
218 return;
219}
220EXPORT_SYMBOL_GPL(set_cr0);
221
222void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
223{
224 set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
225}
226EXPORT_SYMBOL_GPL(lmsw);
227
228void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
229{
230 if (cr4 & CR4_RESERVED_BITS) {
231 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
232 inject_gp(vcpu);
233 return;
234 }
235
236 if (is_long_mode(vcpu)) {
237 if (!(cr4 & X86_CR4_PAE)) {
238 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
239 "in long mode\n");
240 inject_gp(vcpu);
241 return;
242 }
243 } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
244 && !load_pdptrs(vcpu, vcpu->cr3)) {
245 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
246 inject_gp(vcpu);
247 return;
248 }
249
250 if (cr4 & X86_CR4_VMXE) {
251 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
252 inject_gp(vcpu);
253 return;
254 }
255 kvm_x86_ops->set_cr4(vcpu, cr4);
256 vcpu->cr4 = cr4;
257 mutex_lock(&vcpu->kvm->lock);
258 kvm_mmu_reset_context(vcpu);
259 mutex_unlock(&vcpu->kvm->lock);
260}
261EXPORT_SYMBOL_GPL(set_cr4);
262
263void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
264{
265 if (is_long_mode(vcpu)) {
266 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
267 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
268 inject_gp(vcpu);
269 return;
270 }
271 } else {
272 if (is_pae(vcpu)) {
273 if (cr3 & CR3_PAE_RESERVED_BITS) {
274 printk(KERN_DEBUG
275 "set_cr3: #GP, reserved bits\n");
276 inject_gp(vcpu);
277 return;
278 }
279 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
280 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
281 "reserved bits\n");
282 inject_gp(vcpu);
283 return;
284 }
285 }
286 /*
287 * We don't check reserved bits in nonpae mode, because
288 * this isn't enforced, and VMware depends on this.
289 */
290 }
291
292 mutex_lock(&vcpu->kvm->lock);
293 /*
294 * Does the new cr3 value map to physical memory? (Note, we
295 * catch an invalid cr3 even in real-mode, because it would
296 * cause trouble later on when we turn on paging anyway.)
297 *
298 * A real CPU would silently accept an invalid cr3 and would
299 * attempt to use it - with largely undefined (and often hard
300 * to debug) behavior on the guest side.
301 */
302 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
303 inject_gp(vcpu);
304 else {
305 vcpu->cr3 = cr3;
306 vcpu->mmu.new_cr3(vcpu);
307 }
308 mutex_unlock(&vcpu->kvm->lock);
309}
310EXPORT_SYMBOL_GPL(set_cr3);
311
312void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
313{
314 if (cr8 & CR8_RESERVED_BITS) {
315 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
316 inject_gp(vcpu);
317 return;
318 }
319 if (irqchip_in_kernel(vcpu->kvm))
320 kvm_lapic_set_tpr(vcpu, cr8);
321 else
322 vcpu->cr8 = cr8;
323}
324EXPORT_SYMBOL_GPL(set_cr8);
325
326unsigned long get_cr8(struct kvm_vcpu *vcpu)
327{
328 if (irqchip_in_kernel(vcpu->kvm))
329 return kvm_lapic_get_cr8(vcpu);
330 else
331 return vcpu->cr8;
332}
333EXPORT_SYMBOL_GPL(get_cr8);
334
043405e1
CO
335/*
336 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
337 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
338 *
339 * This list is modified at module load time to reflect the
340 * capabilities of the host cpu.
341 */
342static u32 msrs_to_save[] = {
343 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
344 MSR_K6_STAR,
345#ifdef CONFIG_X86_64
346 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
347#endif
348 MSR_IA32_TIME_STAMP_COUNTER,
349};
350
351static unsigned num_msrs_to_save;
352
353static u32 emulated_msrs[] = {
354 MSR_IA32_MISC_ENABLE,
355};
356
15c4a640
CO
357#ifdef CONFIG_X86_64
358
359static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
360{
361 if (efer & EFER_RESERVED_BITS) {
362 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
363 efer);
364 inject_gp(vcpu);
365 return;
366 }
367
368 if (is_paging(vcpu)
369 && (vcpu->shadow_efer & EFER_LME) != (efer & EFER_LME)) {
370 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
371 inject_gp(vcpu);
372 return;
373 }
374
375 kvm_x86_ops->set_efer(vcpu, efer);
376
377 efer &= ~EFER_LMA;
378 efer |= vcpu->shadow_efer & EFER_LMA;
379
380 vcpu->shadow_efer = efer;
381}
382
383#endif
384
385/*
386 * Writes msr value into into the appropriate "register".
387 * Returns 0 on success, non-0 otherwise.
388 * Assumes vcpu_load() was already called.
389 */
390int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
391{
392 return kvm_x86_ops->set_msr(vcpu, msr_index, data);
393}
394
313a3dc7
CO
395/*
396 * Adapt set_msr() to msr_io()'s calling convention
397 */
398static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
399{
400 return kvm_set_msr(vcpu, index, *data);
401}
402
15c4a640
CO
403
404int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
405{
406 switch (msr) {
407#ifdef CONFIG_X86_64
408 case MSR_EFER:
409 set_efer(vcpu, data);
410 break;
411#endif
412 case MSR_IA32_MC0_STATUS:
413 pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
414 __FUNCTION__, data);
415 break;
416 case MSR_IA32_MCG_STATUS:
417 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
418 __FUNCTION__, data);
419 break;
420 case MSR_IA32_UCODE_REV:
421 case MSR_IA32_UCODE_WRITE:
422 case 0x200 ... 0x2ff: /* MTRRs */
423 break;
424 case MSR_IA32_APICBASE:
425 kvm_set_apic_base(vcpu, data);
426 break;
427 case MSR_IA32_MISC_ENABLE:
428 vcpu->ia32_misc_enable_msr = data;
429 break;
430 default:
431 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
432 return 1;
433 }
434 return 0;
435}
436EXPORT_SYMBOL_GPL(kvm_set_msr_common);
437
438
439/*
440 * Reads an msr value (of 'msr_index') into 'pdata'.
441 * Returns 0 on success, non-0 otherwise.
442 * Assumes vcpu_load() was already called.
443 */
444int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
445{
446 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
447}
448
449int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
450{
451 u64 data;
452
453 switch (msr) {
454 case 0xc0010010: /* SYSCFG */
455 case 0xc0010015: /* HWCR */
456 case MSR_IA32_PLATFORM_ID:
457 case MSR_IA32_P5_MC_ADDR:
458 case MSR_IA32_P5_MC_TYPE:
459 case MSR_IA32_MC0_CTL:
460 case MSR_IA32_MCG_STATUS:
461 case MSR_IA32_MCG_CAP:
462 case MSR_IA32_MC0_MISC:
463 case MSR_IA32_MC0_MISC+4:
464 case MSR_IA32_MC0_MISC+8:
465 case MSR_IA32_MC0_MISC+12:
466 case MSR_IA32_MC0_MISC+16:
467 case MSR_IA32_UCODE_REV:
468 case MSR_IA32_PERF_STATUS:
469 case MSR_IA32_EBL_CR_POWERON:
470 /* MTRR registers */
471 case 0xfe:
472 case 0x200 ... 0x2ff:
473 data = 0;
474 break;
475 case 0xcd: /* fsb frequency */
476 data = 3;
477 break;
478 case MSR_IA32_APICBASE:
479 data = kvm_get_apic_base(vcpu);
480 break;
481 case MSR_IA32_MISC_ENABLE:
482 data = vcpu->ia32_misc_enable_msr;
483 break;
484#ifdef CONFIG_X86_64
485 case MSR_EFER:
486 data = vcpu->shadow_efer;
487 break;
488#endif
489 default:
490 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
491 return 1;
492 }
493 *pdata = data;
494 return 0;
495}
496EXPORT_SYMBOL_GPL(kvm_get_msr_common);
497
313a3dc7
CO
498/*
499 * Read or write a bunch of msrs. All parameters are kernel addresses.
500 *
501 * @return number of msrs set successfully.
502 */
503static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
504 struct kvm_msr_entry *entries,
505 int (*do_msr)(struct kvm_vcpu *vcpu,
506 unsigned index, u64 *data))
507{
508 int i;
509
510 vcpu_load(vcpu);
511
512 for (i = 0; i < msrs->nmsrs; ++i)
513 if (do_msr(vcpu, entries[i].index, &entries[i].data))
514 break;
515
516 vcpu_put(vcpu);
517
518 return i;
519}
520
521/*
522 * Read or write a bunch of msrs. Parameters are user addresses.
523 *
524 * @return number of msrs set successfully.
525 */
526static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
527 int (*do_msr)(struct kvm_vcpu *vcpu,
528 unsigned index, u64 *data),
529 int writeback)
530{
531 struct kvm_msrs msrs;
532 struct kvm_msr_entry *entries;
533 int r, n;
534 unsigned size;
535
536 r = -EFAULT;
537 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
538 goto out;
539
540 r = -E2BIG;
541 if (msrs.nmsrs >= MAX_IO_MSRS)
542 goto out;
543
544 r = -ENOMEM;
545 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
546 entries = vmalloc(size);
547 if (!entries)
548 goto out;
549
550 r = -EFAULT;
551 if (copy_from_user(entries, user_msrs->entries, size))
552 goto out_free;
553
554 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
555 if (r < 0)
556 goto out_free;
557
558 r = -EFAULT;
559 if (writeback && copy_to_user(user_msrs->entries, entries, size))
560 goto out_free;
561
562 r = n;
563
564out_free:
565 vfree(entries);
566out:
567 return r;
568}
569
e9b11c17
ZX
570/*
571 * Make sure that a cpu that is being hot-unplugged does not have any vcpus
572 * cached on it.
573 */
574void decache_vcpus_on_cpu(int cpu)
575{
576 struct kvm *vm;
577 struct kvm_vcpu *vcpu;
578 int i;
579
580 spin_lock(&kvm_lock);
581 list_for_each_entry(vm, &vm_list, vm_list)
582 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
583 vcpu = vm->vcpus[i];
584 if (!vcpu)
585 continue;
586 /*
587 * If the vcpu is locked, then it is running on some
588 * other cpu and therefore it is not cached on the
589 * cpu in question.
590 *
591 * If it's not locked, check the last cpu it executed
592 * on.
593 */
594 if (mutex_trylock(&vcpu->mutex)) {
595 if (vcpu->cpu == cpu) {
596 kvm_x86_ops->vcpu_decache(vcpu);
597 vcpu->cpu = -1;
598 }
599 mutex_unlock(&vcpu->mutex);
600 }
601 }
602 spin_unlock(&kvm_lock);
603}
604
018d00d2
ZX
605int kvm_dev_ioctl_check_extension(long ext)
606{
607 int r;
608
609 switch (ext) {
610 case KVM_CAP_IRQCHIP:
611 case KVM_CAP_HLT:
612 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
613 case KVM_CAP_USER_MEMORY:
614 case KVM_CAP_SET_TSS_ADDR:
615 r = 1;
616 break;
617 default:
618 r = 0;
619 break;
620 }
621 return r;
622
623}
624
043405e1
CO
625long kvm_arch_dev_ioctl(struct file *filp,
626 unsigned int ioctl, unsigned long arg)
627{
628 void __user *argp = (void __user *)arg;
629 long r;
630
631 switch (ioctl) {
632 case KVM_GET_MSR_INDEX_LIST: {
633 struct kvm_msr_list __user *user_msr_list = argp;
634 struct kvm_msr_list msr_list;
635 unsigned n;
636
637 r = -EFAULT;
638 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
639 goto out;
640 n = msr_list.nmsrs;
641 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
642 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
643 goto out;
644 r = -E2BIG;
645 if (n < num_msrs_to_save)
646 goto out;
647 r = -EFAULT;
648 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
649 num_msrs_to_save * sizeof(u32)))
650 goto out;
651 if (copy_to_user(user_msr_list->indices
652 + num_msrs_to_save * sizeof(u32),
653 &emulated_msrs,
654 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
655 goto out;
656 r = 0;
657 break;
658 }
659 default:
660 r = -EINVAL;
661 }
662out:
663 return r;
664}
665
313a3dc7
CO
666void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
667{
668 kvm_x86_ops->vcpu_load(vcpu, cpu);
669}
670
671void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
672{
673 kvm_x86_ops->vcpu_put(vcpu);
674}
675
676static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
677{
678 u64 efer;
679 int i;
680 struct kvm_cpuid_entry *e, *entry;
681
682 rdmsrl(MSR_EFER, efer);
683 entry = NULL;
684 for (i = 0; i < vcpu->cpuid_nent; ++i) {
685 e = &vcpu->cpuid_entries[i];
686 if (e->function == 0x80000001) {
687 entry = e;
688 break;
689 }
690 }
691 if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {
692 entry->edx &= ~(1 << 20);
693 printk(KERN_INFO "kvm: guest NX capability removed\n");
694 }
695}
696
697static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
698 struct kvm_cpuid *cpuid,
699 struct kvm_cpuid_entry __user *entries)
700{
701 int r;
702
703 r = -E2BIG;
704 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
705 goto out;
706 r = -EFAULT;
707 if (copy_from_user(&vcpu->cpuid_entries, entries,
708 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
709 goto out;
710 vcpu->cpuid_nent = cpuid->nent;
711 cpuid_fix_nx_cap(vcpu);
712 return 0;
713
714out:
715 return r;
716}
717
718static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
719 struct kvm_lapic_state *s)
720{
721 vcpu_load(vcpu);
722 memcpy(s->regs, vcpu->apic->regs, sizeof *s);
723 vcpu_put(vcpu);
724
725 return 0;
726}
727
728static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
729 struct kvm_lapic_state *s)
730{
731 vcpu_load(vcpu);
732 memcpy(vcpu->apic->regs, s->regs, sizeof *s);
733 kvm_apic_post_state_restore(vcpu);
734 vcpu_put(vcpu);
735
736 return 0;
737}
738
739long kvm_arch_vcpu_ioctl(struct file *filp,
740 unsigned int ioctl, unsigned long arg)
741{
742 struct kvm_vcpu *vcpu = filp->private_data;
743 void __user *argp = (void __user *)arg;
744 int r;
745
746 switch (ioctl) {
747 case KVM_GET_LAPIC: {
748 struct kvm_lapic_state lapic;
749
750 memset(&lapic, 0, sizeof lapic);
751 r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
752 if (r)
753 goto out;
754 r = -EFAULT;
755 if (copy_to_user(argp, &lapic, sizeof lapic))
756 goto out;
757 r = 0;
758 break;
759 }
760 case KVM_SET_LAPIC: {
761 struct kvm_lapic_state lapic;
762
763 r = -EFAULT;
764 if (copy_from_user(&lapic, argp, sizeof lapic))
765 goto out;
766 r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
767 if (r)
768 goto out;
769 r = 0;
770 break;
771 }
772 case KVM_SET_CPUID: {
773 struct kvm_cpuid __user *cpuid_arg = argp;
774 struct kvm_cpuid cpuid;
775
776 r = -EFAULT;
777 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
778 goto out;
779 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
780 if (r)
781 goto out;
782 break;
783 }
784 case KVM_GET_MSRS:
785 r = msr_io(vcpu, argp, kvm_get_msr, 1);
786 break;
787 case KVM_SET_MSRS:
788 r = msr_io(vcpu, argp, do_set_msr, 0);
789 break;
790 default:
791 r = -EINVAL;
792 }
793out:
794 return r;
795}
796
1fe779f8
CO
797static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
798{
799 int ret;
800
801 if (addr > (unsigned int)(-3 * PAGE_SIZE))
802 return -1;
803 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
804 return ret;
805}
806
807static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
808 u32 kvm_nr_mmu_pages)
809{
810 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
811 return -EINVAL;
812
813 mutex_lock(&kvm->lock);
814
815 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
816 kvm->n_requested_mmu_pages = kvm_nr_mmu_pages;
817
818 mutex_unlock(&kvm->lock);
819 return 0;
820}
821
822static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
823{
824 return kvm->n_alloc_mmu_pages;
825}
826
827/*
828 * Set a new alias region. Aliases map a portion of physical memory into
829 * another portion. This is useful for memory windows, for example the PC
830 * VGA region.
831 */
832static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
833 struct kvm_memory_alias *alias)
834{
835 int r, n;
836 struct kvm_mem_alias *p;
837
838 r = -EINVAL;
839 /* General sanity checks */
840 if (alias->memory_size & (PAGE_SIZE - 1))
841 goto out;
842 if (alias->guest_phys_addr & (PAGE_SIZE - 1))
843 goto out;
844 if (alias->slot >= KVM_ALIAS_SLOTS)
845 goto out;
846 if (alias->guest_phys_addr + alias->memory_size
847 < alias->guest_phys_addr)
848 goto out;
849 if (alias->target_phys_addr + alias->memory_size
850 < alias->target_phys_addr)
851 goto out;
852
853 mutex_lock(&kvm->lock);
854
855 p = &kvm->aliases[alias->slot];
856 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
857 p->npages = alias->memory_size >> PAGE_SHIFT;
858 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
859
860 for (n = KVM_ALIAS_SLOTS; n > 0; --n)
861 if (kvm->aliases[n - 1].npages)
862 break;
863 kvm->naliases = n;
864
865 kvm_mmu_zap_all(kvm);
866
867 mutex_unlock(&kvm->lock);
868
869 return 0;
870
871out:
872 return r;
873}
874
875static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
876{
877 int r;
878
879 r = 0;
880 switch (chip->chip_id) {
881 case KVM_IRQCHIP_PIC_MASTER:
882 memcpy(&chip->chip.pic,
883 &pic_irqchip(kvm)->pics[0],
884 sizeof(struct kvm_pic_state));
885 break;
886 case KVM_IRQCHIP_PIC_SLAVE:
887 memcpy(&chip->chip.pic,
888 &pic_irqchip(kvm)->pics[1],
889 sizeof(struct kvm_pic_state));
890 break;
891 case KVM_IRQCHIP_IOAPIC:
892 memcpy(&chip->chip.ioapic,
893 ioapic_irqchip(kvm),
894 sizeof(struct kvm_ioapic_state));
895 break;
896 default:
897 r = -EINVAL;
898 break;
899 }
900 return r;
901}
902
903static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
904{
905 int r;
906
907 r = 0;
908 switch (chip->chip_id) {
909 case KVM_IRQCHIP_PIC_MASTER:
910 memcpy(&pic_irqchip(kvm)->pics[0],
911 &chip->chip.pic,
912 sizeof(struct kvm_pic_state));
913 break;
914 case KVM_IRQCHIP_PIC_SLAVE:
915 memcpy(&pic_irqchip(kvm)->pics[1],
916 &chip->chip.pic,
917 sizeof(struct kvm_pic_state));
918 break;
919 case KVM_IRQCHIP_IOAPIC:
920 memcpy(ioapic_irqchip(kvm),
921 &chip->chip.ioapic,
922 sizeof(struct kvm_ioapic_state));
923 break;
924 default:
925 r = -EINVAL;
926 break;
927 }
928 kvm_pic_update_irq(pic_irqchip(kvm));
929 return r;
930}
931
932long kvm_arch_vm_ioctl(struct file *filp,
933 unsigned int ioctl, unsigned long arg)
934{
935 struct kvm *kvm = filp->private_data;
936 void __user *argp = (void __user *)arg;
937 int r = -EINVAL;
938
939 switch (ioctl) {
940 case KVM_SET_TSS_ADDR:
941 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
942 if (r < 0)
943 goto out;
944 break;
945 case KVM_SET_MEMORY_REGION: {
946 struct kvm_memory_region kvm_mem;
947 struct kvm_userspace_memory_region kvm_userspace_mem;
948
949 r = -EFAULT;
950 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
951 goto out;
952 kvm_userspace_mem.slot = kvm_mem.slot;
953 kvm_userspace_mem.flags = kvm_mem.flags;
954 kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
955 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
956 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
957 if (r)
958 goto out;
959 break;
960 }
961 case KVM_SET_NR_MMU_PAGES:
962 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
963 if (r)
964 goto out;
965 break;
966 case KVM_GET_NR_MMU_PAGES:
967 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
968 break;
969 case KVM_SET_MEMORY_ALIAS: {
970 struct kvm_memory_alias alias;
971
972 r = -EFAULT;
973 if (copy_from_user(&alias, argp, sizeof alias))
974 goto out;
975 r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
976 if (r)
977 goto out;
978 break;
979 }
980 case KVM_CREATE_IRQCHIP:
981 r = -ENOMEM;
982 kvm->vpic = kvm_create_pic(kvm);
983 if (kvm->vpic) {
984 r = kvm_ioapic_init(kvm);
985 if (r) {
986 kfree(kvm->vpic);
987 kvm->vpic = NULL;
988 goto out;
989 }
990 } else
991 goto out;
992 break;
993 case KVM_IRQ_LINE: {
994 struct kvm_irq_level irq_event;
995
996 r = -EFAULT;
997 if (copy_from_user(&irq_event, argp, sizeof irq_event))
998 goto out;
999 if (irqchip_in_kernel(kvm)) {
1000 mutex_lock(&kvm->lock);
1001 if (irq_event.irq < 16)
1002 kvm_pic_set_irq(pic_irqchip(kvm),
1003 irq_event.irq,
1004 irq_event.level);
1005 kvm_ioapic_set_irq(kvm->vioapic,
1006 irq_event.irq,
1007 irq_event.level);
1008 mutex_unlock(&kvm->lock);
1009 r = 0;
1010 }
1011 break;
1012 }
1013 case KVM_GET_IRQCHIP: {
1014 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
1015 struct kvm_irqchip chip;
1016
1017 r = -EFAULT;
1018 if (copy_from_user(&chip, argp, sizeof chip))
1019 goto out;
1020 r = -ENXIO;
1021 if (!irqchip_in_kernel(kvm))
1022 goto out;
1023 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
1024 if (r)
1025 goto out;
1026 r = -EFAULT;
1027 if (copy_to_user(argp, &chip, sizeof chip))
1028 goto out;
1029 r = 0;
1030 break;
1031 }
1032 case KVM_SET_IRQCHIP: {
1033 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
1034 struct kvm_irqchip chip;
1035
1036 r = -EFAULT;
1037 if (copy_from_user(&chip, argp, sizeof chip))
1038 goto out;
1039 r = -ENXIO;
1040 if (!irqchip_in_kernel(kvm))
1041 goto out;
1042 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
1043 if (r)
1044 goto out;
1045 r = 0;
1046 break;
1047 }
1048 default:
1049 ;
1050 }
1051out:
1052 return r;
1053}
1054
a16b043c 1055static void kvm_init_msr_list(void)
043405e1
CO
1056{
1057 u32 dummy[2];
1058 unsigned i, j;
1059
1060 for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
1061 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
1062 continue;
1063 if (j < i)
1064 msrs_to_save[j] = msrs_to_save[i];
1065 j++;
1066 }
1067 num_msrs_to_save = j;
1068}
1069
bbd9b64e
CO
1070/*
1071 * Only apic need an MMIO device hook, so shortcut now..
1072 */
1073static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
1074 gpa_t addr)
1075{
1076 struct kvm_io_device *dev;
1077
1078 if (vcpu->apic) {
1079 dev = &vcpu->apic->dev;
1080 if (dev->in_range(dev, addr))
1081 return dev;
1082 }
1083 return NULL;
1084}
1085
1086
1087static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
1088 gpa_t addr)
1089{
1090 struct kvm_io_device *dev;
1091
1092 dev = vcpu_find_pervcpu_dev(vcpu, addr);
1093 if (dev == NULL)
1094 dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
1095 return dev;
1096}
1097
1098int emulator_read_std(unsigned long addr,
1099 void *val,
1100 unsigned int bytes,
1101 struct kvm_vcpu *vcpu)
1102{
1103 void *data = val;
1104
1105 while (bytes) {
1106 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
1107 unsigned offset = addr & (PAGE_SIZE-1);
1108 unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
1109 int ret;
1110
1111 if (gpa == UNMAPPED_GVA)
1112 return X86EMUL_PROPAGATE_FAULT;
1113 ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy);
1114 if (ret < 0)
1115 return X86EMUL_UNHANDLEABLE;
1116
1117 bytes -= tocopy;
1118 data += tocopy;
1119 addr += tocopy;
1120 }
1121
1122 return X86EMUL_CONTINUE;
1123}
1124EXPORT_SYMBOL_GPL(emulator_read_std);
1125
1126static int emulator_write_std(unsigned long addr,
1127 const void *val,
1128 unsigned int bytes,
1129 struct kvm_vcpu *vcpu)
1130{
1131 pr_unimpl(vcpu, "emulator_write_std: addr %lx n %d\n", addr, bytes);
1132 return X86EMUL_UNHANDLEABLE;
1133}
1134
1135static int emulator_read_emulated(unsigned long addr,
1136 void *val,
1137 unsigned int bytes,
1138 struct kvm_vcpu *vcpu)
1139{
1140 struct kvm_io_device *mmio_dev;
1141 gpa_t gpa;
1142
1143 if (vcpu->mmio_read_completed) {
1144 memcpy(val, vcpu->mmio_data, bytes);
1145 vcpu->mmio_read_completed = 0;
1146 return X86EMUL_CONTINUE;
1147 }
1148
1149 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
1150
1151 /* For APIC access vmexit */
1152 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
1153 goto mmio;
1154
1155 if (emulator_read_std(addr, val, bytes, vcpu)
1156 == X86EMUL_CONTINUE)
1157 return X86EMUL_CONTINUE;
1158 if (gpa == UNMAPPED_GVA)
1159 return X86EMUL_PROPAGATE_FAULT;
1160
1161mmio:
1162 /*
1163 * Is this MMIO handled locally?
1164 */
1165 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
1166 if (mmio_dev) {
1167 kvm_iodevice_read(mmio_dev, gpa, bytes, val);
1168 return X86EMUL_CONTINUE;
1169 }
1170
1171 vcpu->mmio_needed = 1;
1172 vcpu->mmio_phys_addr = gpa;
1173 vcpu->mmio_size = bytes;
1174 vcpu->mmio_is_write = 0;
1175
1176 return X86EMUL_UNHANDLEABLE;
1177}
1178
1179static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1180 const void *val, int bytes)
1181{
1182 int ret;
1183
1184 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
1185 if (ret < 0)
1186 return 0;
1187 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
1188 return 1;
1189}
1190
1191static int emulator_write_emulated_onepage(unsigned long addr,
1192 const void *val,
1193 unsigned int bytes,
1194 struct kvm_vcpu *vcpu)
1195{
1196 struct kvm_io_device *mmio_dev;
1197 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
1198
1199 if (gpa == UNMAPPED_GVA) {
1200 kvm_x86_ops->inject_page_fault(vcpu, addr, 2);
1201 return X86EMUL_PROPAGATE_FAULT;
1202 }
1203
1204 /* For APIC access vmexit */
1205 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
1206 goto mmio;
1207
1208 if (emulator_write_phys(vcpu, gpa, val, bytes))
1209 return X86EMUL_CONTINUE;
1210
1211mmio:
1212 /*
1213 * Is this MMIO handled locally?
1214 */
1215 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
1216 if (mmio_dev) {
1217 kvm_iodevice_write(mmio_dev, gpa, bytes, val);
1218 return X86EMUL_CONTINUE;
1219 }
1220
1221 vcpu->mmio_needed = 1;
1222 vcpu->mmio_phys_addr = gpa;
1223 vcpu->mmio_size = bytes;
1224 vcpu->mmio_is_write = 1;
1225 memcpy(vcpu->mmio_data, val, bytes);
1226
1227 return X86EMUL_CONTINUE;
1228}
1229
1230int emulator_write_emulated(unsigned long addr,
1231 const void *val,
1232 unsigned int bytes,
1233 struct kvm_vcpu *vcpu)
1234{
1235 /* Crossing a page boundary? */
1236 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
1237 int rc, now;
1238
1239 now = -addr & ~PAGE_MASK;
1240 rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
1241 if (rc != X86EMUL_CONTINUE)
1242 return rc;
1243 addr += now;
1244 val += now;
1245 bytes -= now;
1246 }
1247 return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
1248}
1249EXPORT_SYMBOL_GPL(emulator_write_emulated);
1250
1251static int emulator_cmpxchg_emulated(unsigned long addr,
1252 const void *old,
1253 const void *new,
1254 unsigned int bytes,
1255 struct kvm_vcpu *vcpu)
1256{
1257 static int reported;
1258
1259 if (!reported) {
1260 reported = 1;
1261 printk(KERN_WARNING "kvm: emulating exchange as write\n");
1262 }
1263 return emulator_write_emulated(addr, new, bytes, vcpu);
1264}
1265
1266static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
1267{
1268 return kvm_x86_ops->get_segment_base(vcpu, seg);
1269}
1270
1271int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
1272{
1273 return X86EMUL_CONTINUE;
1274}
1275
1276int emulate_clts(struct kvm_vcpu *vcpu)
1277{
1278 kvm_x86_ops->set_cr0(vcpu, vcpu->cr0 & ~X86_CR0_TS);
1279 return X86EMUL_CONTINUE;
1280}
1281
1282int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
1283{
1284 struct kvm_vcpu *vcpu = ctxt->vcpu;
1285
1286 switch (dr) {
1287 case 0 ... 3:
1288 *dest = kvm_x86_ops->get_dr(vcpu, dr);
1289 return X86EMUL_CONTINUE;
1290 default:
1291 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr);
1292 return X86EMUL_UNHANDLEABLE;
1293 }
1294}
1295
1296int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
1297{
1298 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
1299 int exception;
1300
1301 kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
1302 if (exception) {
1303 /* FIXME: better handling */
1304 return X86EMUL_UNHANDLEABLE;
1305 }
1306 return X86EMUL_CONTINUE;
1307}
1308
1309void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
1310{
1311 static int reported;
1312 u8 opcodes[4];
1313 unsigned long rip = vcpu->rip;
1314 unsigned long rip_linear;
1315
1316 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
1317
1318 if (reported)
1319 return;
1320
1321 emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
1322
1323 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
1324 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
1325 reported = 1;
1326}
1327EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
1328
1329struct x86_emulate_ops emulate_ops = {
1330 .read_std = emulator_read_std,
1331 .write_std = emulator_write_std,
1332 .read_emulated = emulator_read_emulated,
1333 .write_emulated = emulator_write_emulated,
1334 .cmpxchg_emulated = emulator_cmpxchg_emulated,
1335};
1336
1337int emulate_instruction(struct kvm_vcpu *vcpu,
1338 struct kvm_run *run,
1339 unsigned long cr2,
1340 u16 error_code,
1341 int no_decode)
1342{
1343 int r;
1344
1345 vcpu->mmio_fault_cr2 = cr2;
1346 kvm_x86_ops->cache_regs(vcpu);
1347
1348 vcpu->mmio_is_write = 0;
1349 vcpu->pio.string = 0;
1350
1351 if (!no_decode) {
1352 int cs_db, cs_l;
1353 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
1354
1355 vcpu->emulate_ctxt.vcpu = vcpu;
1356 vcpu->emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
1357 vcpu->emulate_ctxt.cr2 = cr2;
1358 vcpu->emulate_ctxt.mode =
1359 (vcpu->emulate_ctxt.eflags & X86_EFLAGS_VM)
1360 ? X86EMUL_MODE_REAL : cs_l
1361 ? X86EMUL_MODE_PROT64 : cs_db
1362 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
1363
1364 if (vcpu->emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
1365 vcpu->emulate_ctxt.cs_base = 0;
1366 vcpu->emulate_ctxt.ds_base = 0;
1367 vcpu->emulate_ctxt.es_base = 0;
1368 vcpu->emulate_ctxt.ss_base = 0;
1369 } else {
1370 vcpu->emulate_ctxt.cs_base =
1371 get_segment_base(vcpu, VCPU_SREG_CS);
1372 vcpu->emulate_ctxt.ds_base =
1373 get_segment_base(vcpu, VCPU_SREG_DS);
1374 vcpu->emulate_ctxt.es_base =
1375 get_segment_base(vcpu, VCPU_SREG_ES);
1376 vcpu->emulate_ctxt.ss_base =
1377 get_segment_base(vcpu, VCPU_SREG_SS);
1378 }
1379
1380 vcpu->emulate_ctxt.gs_base =
1381 get_segment_base(vcpu, VCPU_SREG_GS);
1382 vcpu->emulate_ctxt.fs_base =
1383 get_segment_base(vcpu, VCPU_SREG_FS);
1384
1385 r = x86_decode_insn(&vcpu->emulate_ctxt, &emulate_ops);
f2b5756b 1386 ++vcpu->stat.insn_emulation;
bbd9b64e 1387 if (r) {
f2b5756b 1388 ++vcpu->stat.insn_emulation_fail;
bbd9b64e
CO
1389 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
1390 return EMULATE_DONE;
1391 return EMULATE_FAIL;
1392 }
1393 }
1394
1395 r = x86_emulate_insn(&vcpu->emulate_ctxt, &emulate_ops);
1396
1397 if (vcpu->pio.string)
1398 return EMULATE_DO_MMIO;
1399
1400 if ((r || vcpu->mmio_is_write) && run) {
1401 run->exit_reason = KVM_EXIT_MMIO;
1402 run->mmio.phys_addr = vcpu->mmio_phys_addr;
1403 memcpy(run->mmio.data, vcpu->mmio_data, 8);
1404 run->mmio.len = vcpu->mmio_size;
1405 run->mmio.is_write = vcpu->mmio_is_write;
1406 }
1407
1408 if (r) {
1409 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
1410 return EMULATE_DONE;
1411 if (!vcpu->mmio_needed) {
1412 kvm_report_emulation_failure(vcpu, "mmio");
1413 return EMULATE_FAIL;
1414 }
1415 return EMULATE_DO_MMIO;
1416 }
1417
1418 kvm_x86_ops->decache_regs(vcpu);
1419 kvm_x86_ops->set_rflags(vcpu, vcpu->emulate_ctxt.eflags);
1420
1421 if (vcpu->mmio_is_write) {
1422 vcpu->mmio_needed = 0;
1423 return EMULATE_DO_MMIO;
1424 }
1425
1426 return EMULATE_DONE;
1427}
1428EXPORT_SYMBOL_GPL(emulate_instruction);
1429
de7d789a
CO
1430static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
1431{
1432 int i;
1433
1434 for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i)
1435 if (vcpu->pio.guest_pages[i]) {
1436 kvm_release_page(vcpu->pio.guest_pages[i]);
1437 vcpu->pio.guest_pages[i] = NULL;
1438 }
1439}
1440
1441static int pio_copy_data(struct kvm_vcpu *vcpu)
1442{
1443 void *p = vcpu->pio_data;
1444 void *q;
1445 unsigned bytes;
1446 int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
1447
1448 q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
1449 PAGE_KERNEL);
1450 if (!q) {
1451 free_pio_guest_pages(vcpu);
1452 return -ENOMEM;
1453 }
1454 q += vcpu->pio.guest_page_offset;
1455 bytes = vcpu->pio.size * vcpu->pio.cur_count;
1456 if (vcpu->pio.in)
1457 memcpy(q, p, bytes);
1458 else
1459 memcpy(p, q, bytes);
1460 q -= vcpu->pio.guest_page_offset;
1461 vunmap(q);
1462 free_pio_guest_pages(vcpu);
1463 return 0;
1464}
1465
1466int complete_pio(struct kvm_vcpu *vcpu)
1467{
1468 struct kvm_pio_request *io = &vcpu->pio;
1469 long delta;
1470 int r;
1471
1472 kvm_x86_ops->cache_regs(vcpu);
1473
1474 if (!io->string) {
1475 if (io->in)
1476 memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data,
1477 io->size);
1478 } else {
1479 if (io->in) {
1480 r = pio_copy_data(vcpu);
1481 if (r) {
1482 kvm_x86_ops->cache_regs(vcpu);
1483 return r;
1484 }
1485 }
1486
1487 delta = 1;
1488 if (io->rep) {
1489 delta *= io->cur_count;
1490 /*
1491 * The size of the register should really depend on
1492 * current address size.
1493 */
1494 vcpu->regs[VCPU_REGS_RCX] -= delta;
1495 }
1496 if (io->down)
1497 delta = -delta;
1498 delta *= io->size;
1499 if (io->in)
1500 vcpu->regs[VCPU_REGS_RDI] += delta;
1501 else
1502 vcpu->regs[VCPU_REGS_RSI] += delta;
1503 }
1504
1505 kvm_x86_ops->decache_regs(vcpu);
1506
1507 io->count -= io->cur_count;
1508 io->cur_count = 0;
1509
1510 return 0;
1511}
1512
1513static void kernel_pio(struct kvm_io_device *pio_dev,
1514 struct kvm_vcpu *vcpu,
1515 void *pd)
1516{
1517 /* TODO: String I/O for in kernel device */
1518
1519 mutex_lock(&vcpu->kvm->lock);
1520 if (vcpu->pio.in)
1521 kvm_iodevice_read(pio_dev, vcpu->pio.port,
1522 vcpu->pio.size,
1523 pd);
1524 else
1525 kvm_iodevice_write(pio_dev, vcpu->pio.port,
1526 vcpu->pio.size,
1527 pd);
1528 mutex_unlock(&vcpu->kvm->lock);
1529}
1530
1531static void pio_string_write(struct kvm_io_device *pio_dev,
1532 struct kvm_vcpu *vcpu)
1533{
1534 struct kvm_pio_request *io = &vcpu->pio;
1535 void *pd = vcpu->pio_data;
1536 int i;
1537
1538 mutex_lock(&vcpu->kvm->lock);
1539 for (i = 0; i < io->cur_count; i++) {
1540 kvm_iodevice_write(pio_dev, io->port,
1541 io->size,
1542 pd);
1543 pd += io->size;
1544 }
1545 mutex_unlock(&vcpu->kvm->lock);
1546}
1547
1548static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
1549 gpa_t addr)
1550{
1551 return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr);
1552}
1553
1554int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1555 int size, unsigned port)
1556{
1557 struct kvm_io_device *pio_dev;
1558
1559 vcpu->run->exit_reason = KVM_EXIT_IO;
1560 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
1561 vcpu->run->io.size = vcpu->pio.size = size;
1562 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
1563 vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = 1;
1564 vcpu->run->io.port = vcpu->pio.port = port;
1565 vcpu->pio.in = in;
1566 vcpu->pio.string = 0;
1567 vcpu->pio.down = 0;
1568 vcpu->pio.guest_page_offset = 0;
1569 vcpu->pio.rep = 0;
1570
1571 kvm_x86_ops->cache_regs(vcpu);
1572 memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
1573 kvm_x86_ops->decache_regs(vcpu);
1574
1575 kvm_x86_ops->skip_emulated_instruction(vcpu);
1576
1577 pio_dev = vcpu_find_pio_dev(vcpu, port);
1578 if (pio_dev) {
1579 kernel_pio(pio_dev, vcpu, vcpu->pio_data);
1580 complete_pio(vcpu);
1581 return 1;
1582 }
1583 return 0;
1584}
1585EXPORT_SYMBOL_GPL(kvm_emulate_pio);
1586
1587int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1588 int size, unsigned long count, int down,
1589 gva_t address, int rep, unsigned port)
1590{
1591 unsigned now, in_page;
1592 int i, ret = 0;
1593 int nr_pages = 1;
1594 struct page *page;
1595 struct kvm_io_device *pio_dev;
1596
1597 vcpu->run->exit_reason = KVM_EXIT_IO;
1598 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
1599 vcpu->run->io.size = vcpu->pio.size = size;
1600 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
1601 vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = count;
1602 vcpu->run->io.port = vcpu->pio.port = port;
1603 vcpu->pio.in = in;
1604 vcpu->pio.string = 1;
1605 vcpu->pio.down = down;
1606 vcpu->pio.guest_page_offset = offset_in_page(address);
1607 vcpu->pio.rep = rep;
1608
1609 if (!count) {
1610 kvm_x86_ops->skip_emulated_instruction(vcpu);
1611 return 1;
1612 }
1613
1614 if (!down)
1615 in_page = PAGE_SIZE - offset_in_page(address);
1616 else
1617 in_page = offset_in_page(address) + size;
1618 now = min(count, (unsigned long)in_page / size);
1619 if (!now) {
1620 /*
1621 * String I/O straddles page boundary. Pin two guest pages
1622 * so that we satisfy atomicity constraints. Do just one
1623 * transaction to avoid complexity.
1624 */
1625 nr_pages = 2;
1626 now = 1;
1627 }
1628 if (down) {
1629 /*
1630 * String I/O in reverse. Yuck. Kill the guest, fix later.
1631 */
1632 pr_unimpl(vcpu, "guest string pio down\n");
1633 inject_gp(vcpu);
1634 return 1;
1635 }
1636 vcpu->run->io.count = now;
1637 vcpu->pio.cur_count = now;
1638
1639 if (vcpu->pio.cur_count == vcpu->pio.count)
1640 kvm_x86_ops->skip_emulated_instruction(vcpu);
1641
1642 for (i = 0; i < nr_pages; ++i) {
1643 mutex_lock(&vcpu->kvm->lock);
1644 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
1645 vcpu->pio.guest_pages[i] = page;
1646 mutex_unlock(&vcpu->kvm->lock);
1647 if (!page) {
1648 inject_gp(vcpu);
1649 free_pio_guest_pages(vcpu);
1650 return 1;
1651 }
1652 }
1653
1654 pio_dev = vcpu_find_pio_dev(vcpu, port);
1655 if (!vcpu->pio.in) {
1656 /* string PIO write */
1657 ret = pio_copy_data(vcpu);
1658 if (ret >= 0 && pio_dev) {
1659 pio_string_write(pio_dev, vcpu);
1660 complete_pio(vcpu);
1661 if (vcpu->pio.count == 0)
1662 ret = 1;
1663 }
1664 } else if (pio_dev)
1665 pr_unimpl(vcpu, "no string pio read support yet, "
1666 "port %x size %d count %ld\n",
1667 port, size, count);
1668
1669 return ret;
1670}
1671EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
1672
f8c16bba 1673int kvm_arch_init(void *opaque)
043405e1 1674{
f8c16bba
ZX
1675 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
1676
043405e1 1677 kvm_init_msr_list();
f8c16bba
ZX
1678
1679 if (kvm_x86_ops) {
1680 printk(KERN_ERR "kvm: already loaded the other module\n");
1681 return -EEXIST;
1682 }
1683
1684 if (!ops->cpu_has_kvm_support()) {
1685 printk(KERN_ERR "kvm: no hardware support\n");
1686 return -EOPNOTSUPP;
1687 }
1688 if (ops->disabled_by_bios()) {
1689 printk(KERN_ERR "kvm: disabled by bios\n");
1690 return -EOPNOTSUPP;
1691 }
1692
1693 kvm_x86_ops = ops;
1694
1695 return 0;
043405e1 1696}
8776e519 1697
f8c16bba
ZX
1698void kvm_arch_exit(void)
1699{
1700 kvm_x86_ops = NULL;
1701 }
1702
8776e519
HB
1703int kvm_emulate_halt(struct kvm_vcpu *vcpu)
1704{
1705 ++vcpu->stat.halt_exits;
1706 if (irqchip_in_kernel(vcpu->kvm)) {
1707 vcpu->mp_state = VCPU_MP_STATE_HALTED;
1708 kvm_vcpu_block(vcpu);
1709 if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE)
1710 return -EINTR;
1711 return 1;
1712 } else {
1713 vcpu->run->exit_reason = KVM_EXIT_HLT;
1714 return 0;
1715 }
1716}
1717EXPORT_SYMBOL_GPL(kvm_emulate_halt);
1718
1719int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
1720{
1721 unsigned long nr, a0, a1, a2, a3, ret;
1722
1723 kvm_x86_ops->cache_regs(vcpu);
1724
1725 nr = vcpu->regs[VCPU_REGS_RAX];
1726 a0 = vcpu->regs[VCPU_REGS_RBX];
1727 a1 = vcpu->regs[VCPU_REGS_RCX];
1728 a2 = vcpu->regs[VCPU_REGS_RDX];
1729 a3 = vcpu->regs[VCPU_REGS_RSI];
1730
1731 if (!is_long_mode(vcpu)) {
1732 nr &= 0xFFFFFFFF;
1733 a0 &= 0xFFFFFFFF;
1734 a1 &= 0xFFFFFFFF;
1735 a2 &= 0xFFFFFFFF;
1736 a3 &= 0xFFFFFFFF;
1737 }
1738
1739 switch (nr) {
1740 default:
1741 ret = -KVM_ENOSYS;
1742 break;
1743 }
1744 vcpu->regs[VCPU_REGS_RAX] = ret;
1745 kvm_x86_ops->decache_regs(vcpu);
1746 return 0;
1747}
1748EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
1749
1750int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
1751{
1752 char instruction[3];
1753 int ret = 0;
1754
1755 mutex_lock(&vcpu->kvm->lock);
1756
1757 /*
1758 * Blow out the MMU to ensure that no other VCPU has an active mapping
1759 * to ensure that the updated hypercall appears atomically across all
1760 * VCPUs.
1761 */
1762 kvm_mmu_zap_all(vcpu->kvm);
1763
1764 kvm_x86_ops->cache_regs(vcpu);
1765 kvm_x86_ops->patch_hypercall(vcpu, instruction);
1766 if (emulator_write_emulated(vcpu->rip, instruction, 3, vcpu)
1767 != X86EMUL_CONTINUE)
1768 ret = -EFAULT;
1769
1770 mutex_unlock(&vcpu->kvm->lock);
1771
1772 return ret;
1773}
1774
1775static u64 mk_cr_64(u64 curr_cr, u32 new_val)
1776{
1777 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
1778}
1779
1780void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
1781{
1782 struct descriptor_table dt = { limit, base };
1783
1784 kvm_x86_ops->set_gdt(vcpu, &dt);
1785}
1786
1787void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
1788{
1789 struct descriptor_table dt = { limit, base };
1790
1791 kvm_x86_ops->set_idt(vcpu, &dt);
1792}
1793
1794void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
1795 unsigned long *rflags)
1796{
1797 lmsw(vcpu, msw);
1798 *rflags = kvm_x86_ops->get_rflags(vcpu);
1799}
1800
1801unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
1802{
1803 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
1804 switch (cr) {
1805 case 0:
1806 return vcpu->cr0;
1807 case 2:
1808 return vcpu->cr2;
1809 case 3:
1810 return vcpu->cr3;
1811 case 4:
1812 return vcpu->cr4;
1813 default:
1814 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
1815 return 0;
1816 }
1817}
1818
1819void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
1820 unsigned long *rflags)
1821{
1822 switch (cr) {
1823 case 0:
1824 set_cr0(vcpu, mk_cr_64(vcpu->cr0, val));
1825 *rflags = kvm_x86_ops->get_rflags(vcpu);
1826 break;
1827 case 2:
1828 vcpu->cr2 = val;
1829 break;
1830 case 3:
1831 set_cr3(vcpu, val);
1832 break;
1833 case 4:
1834 set_cr4(vcpu, mk_cr_64(vcpu->cr4, val));
1835 break;
1836 default:
1837 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
1838 }
1839}
1840
1841void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
1842{
1843 int i;
1844 u32 function;
1845 struct kvm_cpuid_entry *e, *best;
1846
1847 kvm_x86_ops->cache_regs(vcpu);
1848 function = vcpu->regs[VCPU_REGS_RAX];
1849 vcpu->regs[VCPU_REGS_RAX] = 0;
1850 vcpu->regs[VCPU_REGS_RBX] = 0;
1851 vcpu->regs[VCPU_REGS_RCX] = 0;
1852 vcpu->regs[VCPU_REGS_RDX] = 0;
1853 best = NULL;
1854 for (i = 0; i < vcpu->cpuid_nent; ++i) {
1855 e = &vcpu->cpuid_entries[i];
1856 if (e->function == function) {
1857 best = e;
1858 break;
1859 }
1860 /*
1861 * Both basic or both extended?
1862 */
1863 if (((e->function ^ function) & 0x80000000) == 0)
1864 if (!best || e->function > best->function)
1865 best = e;
1866 }
1867 if (best) {
1868 vcpu->regs[VCPU_REGS_RAX] = best->eax;
1869 vcpu->regs[VCPU_REGS_RBX] = best->ebx;
1870 vcpu->regs[VCPU_REGS_RCX] = best->ecx;
1871 vcpu->regs[VCPU_REGS_RDX] = best->edx;
1872 }
1873 kvm_x86_ops->decache_regs(vcpu);
1874 kvm_x86_ops->skip_emulated_instruction(vcpu);
1875}
1876EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
d0752060 1877
b6c7a5dc
HB
1878/*
1879 * Check if userspace requested an interrupt window, and that the
1880 * interrupt window is open.
1881 *
1882 * No need to exit to userspace if we already have an interrupt queued.
1883 */
1884static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
1885 struct kvm_run *kvm_run)
1886{
1887 return (!vcpu->irq_summary &&
1888 kvm_run->request_interrupt_window &&
1889 vcpu->interrupt_window_open &&
1890 (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
1891}
1892
1893static void post_kvm_run_save(struct kvm_vcpu *vcpu,
1894 struct kvm_run *kvm_run)
1895{
1896 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
1897 kvm_run->cr8 = get_cr8(vcpu);
1898 kvm_run->apic_base = kvm_get_apic_base(vcpu);
1899 if (irqchip_in_kernel(vcpu->kvm))
1900 kvm_run->ready_for_interrupt_injection = 1;
1901 else
1902 kvm_run->ready_for_interrupt_injection =
1903 (vcpu->interrupt_window_open &&
1904 vcpu->irq_summary == 0);
1905}
1906
1907static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1908{
1909 int r;
1910
1911 if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
1912 pr_debug("vcpu %d received sipi with vector # %x\n",
1913 vcpu->vcpu_id, vcpu->sipi_vector);
1914 kvm_lapic_reset(vcpu);
1915 r = kvm_x86_ops->vcpu_reset(vcpu);
1916 if (r)
1917 return r;
1918 vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
1919 }
1920
1921preempted:
1922 if (vcpu->guest_debug.enabled)
1923 kvm_x86_ops->guest_debug_pre(vcpu);
1924
1925again:
1926 r = kvm_mmu_reload(vcpu);
1927 if (unlikely(r))
1928 goto out;
1929
1930 kvm_inject_pending_timer_irqs(vcpu);
1931
1932 preempt_disable();
1933
1934 kvm_x86_ops->prepare_guest_switch(vcpu);
1935 kvm_load_guest_fpu(vcpu);
1936
1937 local_irq_disable();
1938
1939 if (signal_pending(current)) {
1940 local_irq_enable();
1941 preempt_enable();
1942 r = -EINTR;
1943 kvm_run->exit_reason = KVM_EXIT_INTR;
1944 ++vcpu->stat.signal_exits;
1945 goto out;
1946 }
1947
1948 if (irqchip_in_kernel(vcpu->kvm))
1949 kvm_x86_ops->inject_pending_irq(vcpu);
1950 else if (!vcpu->mmio_read_completed)
1951 kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
1952
1953 vcpu->guest_mode = 1;
1954 kvm_guest_enter();
1955
1956 if (vcpu->requests)
1957 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
1958 kvm_x86_ops->tlb_flush(vcpu);
1959
1960 kvm_x86_ops->run(vcpu, kvm_run);
1961
1962 vcpu->guest_mode = 0;
1963 local_irq_enable();
1964
1965 ++vcpu->stat.exits;
1966
1967 /*
1968 * We must have an instruction between local_irq_enable() and
1969 * kvm_guest_exit(), so the timer interrupt isn't delayed by
1970 * the interrupt shadow. The stat.exits increment will do nicely.
1971 * But we need to prevent reordering, hence this barrier():
1972 */
1973 barrier();
1974
1975 kvm_guest_exit();
1976
1977 preempt_enable();
1978
1979 /*
1980 * Profile KVM exit RIPs:
1981 */
1982 if (unlikely(prof_on == KVM_PROFILING)) {
1983 kvm_x86_ops->cache_regs(vcpu);
1984 profile_hit(KVM_PROFILING, (void *)vcpu->rip);
1985 }
1986
1987 r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
1988
1989 if (r > 0) {
1990 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
1991 r = -EINTR;
1992 kvm_run->exit_reason = KVM_EXIT_INTR;
1993 ++vcpu->stat.request_irq_exits;
1994 goto out;
1995 }
e1beb1d3 1996 if (!need_resched())
b6c7a5dc 1997 goto again;
b6c7a5dc
HB
1998 }
1999
2000out:
2001 if (r > 0) {
2002 kvm_resched(vcpu);
2003 goto preempted;
2004 }
2005
2006 post_kvm_run_save(vcpu, kvm_run);
2007
2008 return r;
2009}
2010
2011int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2012{
2013 int r;
2014 sigset_t sigsaved;
2015
2016 vcpu_load(vcpu);
2017
2018 if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
2019 kvm_vcpu_block(vcpu);
2020 vcpu_put(vcpu);
2021 return -EAGAIN;
2022 }
2023
2024 if (vcpu->sigset_active)
2025 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2026
2027 /* re-sync apic's tpr */
2028 if (!irqchip_in_kernel(vcpu->kvm))
2029 set_cr8(vcpu, kvm_run->cr8);
2030
2031 if (vcpu->pio.cur_count) {
2032 r = complete_pio(vcpu);
2033 if (r)
2034 goto out;
2035 }
2036#if CONFIG_HAS_IOMEM
2037 if (vcpu->mmio_needed) {
2038 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
2039 vcpu->mmio_read_completed = 1;
2040 vcpu->mmio_needed = 0;
2041 r = emulate_instruction(vcpu, kvm_run,
2042 vcpu->mmio_fault_cr2, 0, 1);
2043 if (r == EMULATE_DO_MMIO) {
2044 /*
2045 * Read-modify-write. Back to userspace.
2046 */
2047 r = 0;
2048 goto out;
2049 }
2050 }
2051#endif
2052 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
2053 kvm_x86_ops->cache_regs(vcpu);
2054 vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
2055 kvm_x86_ops->decache_regs(vcpu);
2056 }
2057
2058 r = __vcpu_run(vcpu, kvm_run);
2059
2060out:
2061 if (vcpu->sigset_active)
2062 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2063
2064 vcpu_put(vcpu);
2065 return r;
2066}
2067
2068int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2069{
2070 vcpu_load(vcpu);
2071
2072 kvm_x86_ops->cache_regs(vcpu);
2073
2074 regs->rax = vcpu->regs[VCPU_REGS_RAX];
2075 regs->rbx = vcpu->regs[VCPU_REGS_RBX];
2076 regs->rcx = vcpu->regs[VCPU_REGS_RCX];
2077 regs->rdx = vcpu->regs[VCPU_REGS_RDX];
2078 regs->rsi = vcpu->regs[VCPU_REGS_RSI];
2079 regs->rdi = vcpu->regs[VCPU_REGS_RDI];
2080 regs->rsp = vcpu->regs[VCPU_REGS_RSP];
2081 regs->rbp = vcpu->regs[VCPU_REGS_RBP];
2082#ifdef CONFIG_X86_64
2083 regs->r8 = vcpu->regs[VCPU_REGS_R8];
2084 regs->r9 = vcpu->regs[VCPU_REGS_R9];
2085 regs->r10 = vcpu->regs[VCPU_REGS_R10];
2086 regs->r11 = vcpu->regs[VCPU_REGS_R11];
2087 regs->r12 = vcpu->regs[VCPU_REGS_R12];
2088 regs->r13 = vcpu->regs[VCPU_REGS_R13];
2089 regs->r14 = vcpu->regs[VCPU_REGS_R14];
2090 regs->r15 = vcpu->regs[VCPU_REGS_R15];
2091#endif
2092
2093 regs->rip = vcpu->rip;
2094 regs->rflags = kvm_x86_ops->get_rflags(vcpu);
2095
2096 /*
2097 * Don't leak debug flags in case they were set for guest debugging
2098 */
2099 if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
2100 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
2101
2102 vcpu_put(vcpu);
2103
2104 return 0;
2105}
2106
2107int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2108{
2109 vcpu_load(vcpu);
2110
2111 vcpu->regs[VCPU_REGS_RAX] = regs->rax;
2112 vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
2113 vcpu->regs[VCPU_REGS_RCX] = regs->rcx;
2114 vcpu->regs[VCPU_REGS_RDX] = regs->rdx;
2115 vcpu->regs[VCPU_REGS_RSI] = regs->rsi;
2116 vcpu->regs[VCPU_REGS_RDI] = regs->rdi;
2117 vcpu->regs[VCPU_REGS_RSP] = regs->rsp;
2118 vcpu->regs[VCPU_REGS_RBP] = regs->rbp;
2119#ifdef CONFIG_X86_64
2120 vcpu->regs[VCPU_REGS_R8] = regs->r8;
2121 vcpu->regs[VCPU_REGS_R9] = regs->r9;
2122 vcpu->regs[VCPU_REGS_R10] = regs->r10;
2123 vcpu->regs[VCPU_REGS_R11] = regs->r11;
2124 vcpu->regs[VCPU_REGS_R12] = regs->r12;
2125 vcpu->regs[VCPU_REGS_R13] = regs->r13;
2126 vcpu->regs[VCPU_REGS_R14] = regs->r14;
2127 vcpu->regs[VCPU_REGS_R15] = regs->r15;
2128#endif
2129
2130 vcpu->rip = regs->rip;
2131 kvm_x86_ops->set_rflags(vcpu, regs->rflags);
2132
2133 kvm_x86_ops->decache_regs(vcpu);
2134
2135 vcpu_put(vcpu);
2136
2137 return 0;
2138}
2139
2140static void get_segment(struct kvm_vcpu *vcpu,
2141 struct kvm_segment *var, int seg)
2142{
2143 return kvm_x86_ops->get_segment(vcpu, var, seg);
2144}
2145
2146void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
2147{
2148 struct kvm_segment cs;
2149
2150 get_segment(vcpu, &cs, VCPU_SREG_CS);
2151 *db = cs.db;
2152 *l = cs.l;
2153}
2154EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
2155
2156int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2157 struct kvm_sregs *sregs)
2158{
2159 struct descriptor_table dt;
2160 int pending_vec;
2161
2162 vcpu_load(vcpu);
2163
2164 get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
2165 get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
2166 get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
2167 get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
2168 get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
2169 get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
2170
2171 get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
2172 get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
2173
2174 kvm_x86_ops->get_idt(vcpu, &dt);
2175 sregs->idt.limit = dt.limit;
2176 sregs->idt.base = dt.base;
2177 kvm_x86_ops->get_gdt(vcpu, &dt);
2178 sregs->gdt.limit = dt.limit;
2179 sregs->gdt.base = dt.base;
2180
2181 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
2182 sregs->cr0 = vcpu->cr0;
2183 sregs->cr2 = vcpu->cr2;
2184 sregs->cr3 = vcpu->cr3;
2185 sregs->cr4 = vcpu->cr4;
2186 sregs->cr8 = get_cr8(vcpu);
2187 sregs->efer = vcpu->shadow_efer;
2188 sregs->apic_base = kvm_get_apic_base(vcpu);
2189
2190 if (irqchip_in_kernel(vcpu->kvm)) {
2191 memset(sregs->interrupt_bitmap, 0,
2192 sizeof sregs->interrupt_bitmap);
2193 pending_vec = kvm_x86_ops->get_irq(vcpu);
2194 if (pending_vec >= 0)
2195 set_bit(pending_vec,
2196 (unsigned long *)sregs->interrupt_bitmap);
2197 } else
2198 memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
2199 sizeof sregs->interrupt_bitmap);
2200
2201 vcpu_put(vcpu);
2202
2203 return 0;
2204}
2205
2206static void set_segment(struct kvm_vcpu *vcpu,
2207 struct kvm_segment *var, int seg)
2208{
2209 return kvm_x86_ops->set_segment(vcpu, var, seg);
2210}
2211
2212int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2213 struct kvm_sregs *sregs)
2214{
2215 int mmu_reset_needed = 0;
2216 int i, pending_vec, max_bits;
2217 struct descriptor_table dt;
2218
2219 vcpu_load(vcpu);
2220
2221 dt.limit = sregs->idt.limit;
2222 dt.base = sregs->idt.base;
2223 kvm_x86_ops->set_idt(vcpu, &dt);
2224 dt.limit = sregs->gdt.limit;
2225 dt.base = sregs->gdt.base;
2226 kvm_x86_ops->set_gdt(vcpu, &dt);
2227
2228 vcpu->cr2 = sregs->cr2;
2229 mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
2230 vcpu->cr3 = sregs->cr3;
2231
2232 set_cr8(vcpu, sregs->cr8);
2233
2234 mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
2235#ifdef CONFIG_X86_64
2236 kvm_x86_ops->set_efer(vcpu, sregs->efer);
2237#endif
2238 kvm_set_apic_base(vcpu, sregs->apic_base);
2239
2240 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
2241
2242 mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
2243 vcpu->cr0 = sregs->cr0;
2244 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
2245
2246 mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
2247 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
2248 if (!is_long_mode(vcpu) && is_pae(vcpu))
2249 load_pdptrs(vcpu, vcpu->cr3);
2250
2251 if (mmu_reset_needed)
2252 kvm_mmu_reset_context(vcpu);
2253
2254 if (!irqchip_in_kernel(vcpu->kvm)) {
2255 memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
2256 sizeof vcpu->irq_pending);
2257 vcpu->irq_summary = 0;
2258 for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i)
2259 if (vcpu->irq_pending[i])
2260 __set_bit(i, &vcpu->irq_summary);
2261 } else {
2262 max_bits = (sizeof sregs->interrupt_bitmap) << 3;
2263 pending_vec = find_first_bit(
2264 (const unsigned long *)sregs->interrupt_bitmap,
2265 max_bits);
2266 /* Only pending external irq is handled here */
2267 if (pending_vec < max_bits) {
2268 kvm_x86_ops->set_irq(vcpu, pending_vec);
2269 pr_debug("Set back pending irq %d\n",
2270 pending_vec);
2271 }
2272 }
2273
2274 set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
2275 set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
2276 set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
2277 set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
2278 set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
2279 set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
2280
2281 set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
2282 set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
2283
2284 vcpu_put(vcpu);
2285
2286 return 0;
2287}
2288
2289int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
2290 struct kvm_debug_guest *dbg)
2291{
2292 int r;
2293
2294 vcpu_load(vcpu);
2295
2296 r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
2297
2298 vcpu_put(vcpu);
2299
2300 return r;
2301}
2302
d0752060
HB
2303/*
2304 * fxsave fpu state. Taken from x86_64/processor.h. To be killed when
2305 * we have asm/x86/processor.h
2306 */
2307struct fxsave {
2308 u16 cwd;
2309 u16 swd;
2310 u16 twd;
2311 u16 fop;
2312 u64 rip;
2313 u64 rdp;
2314 u32 mxcsr;
2315 u32 mxcsr_mask;
2316 u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
2317#ifdef CONFIG_X86_64
2318 u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
2319#else
2320 u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
2321#endif
2322};
2323
8b006791
ZX
2324/*
2325 * Translate a guest virtual address to a guest physical address.
2326 */
2327int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2328 struct kvm_translation *tr)
2329{
2330 unsigned long vaddr = tr->linear_address;
2331 gpa_t gpa;
2332
2333 vcpu_load(vcpu);
2334 mutex_lock(&vcpu->kvm->lock);
2335 gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
2336 tr->physical_address = gpa;
2337 tr->valid = gpa != UNMAPPED_GVA;
2338 tr->writeable = 1;
2339 tr->usermode = 0;
2340 mutex_unlock(&vcpu->kvm->lock);
2341 vcpu_put(vcpu);
2342
2343 return 0;
2344}
2345
d0752060
HB
2346int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2347{
2348 struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
2349
2350 vcpu_load(vcpu);
2351
2352 memcpy(fpu->fpr, fxsave->st_space, 128);
2353 fpu->fcw = fxsave->cwd;
2354 fpu->fsw = fxsave->swd;
2355 fpu->ftwx = fxsave->twd;
2356 fpu->last_opcode = fxsave->fop;
2357 fpu->last_ip = fxsave->rip;
2358 fpu->last_dp = fxsave->rdp;
2359 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
2360
2361 vcpu_put(vcpu);
2362
2363 return 0;
2364}
2365
2366int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2367{
2368 struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
2369
2370 vcpu_load(vcpu);
2371
2372 memcpy(fxsave->st_space, fpu->fpr, 128);
2373 fxsave->cwd = fpu->fcw;
2374 fxsave->swd = fpu->fsw;
2375 fxsave->twd = fpu->ftwx;
2376 fxsave->fop = fpu->last_opcode;
2377 fxsave->rip = fpu->last_ip;
2378 fxsave->rdp = fpu->last_dp;
2379 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
2380
2381 vcpu_put(vcpu);
2382
2383 return 0;
2384}
2385
2386void fx_init(struct kvm_vcpu *vcpu)
2387{
2388 unsigned after_mxcsr_mask;
2389
2390 /* Initialize guest FPU by resetting ours and saving into guest's */
2391 preempt_disable();
2392 fx_save(&vcpu->host_fx_image);
2393 fpu_init();
2394 fx_save(&vcpu->guest_fx_image);
2395 fx_restore(&vcpu->host_fx_image);
2396 preempt_enable();
2397
2398 vcpu->cr0 |= X86_CR0_ET;
2399 after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
2400 vcpu->guest_fx_image.mxcsr = 0x1f80;
2401 memset((void *)&vcpu->guest_fx_image + after_mxcsr_mask,
2402 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
2403}
2404EXPORT_SYMBOL_GPL(fx_init);
2405
2406void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
2407{
2408 if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
2409 return;
2410
2411 vcpu->guest_fpu_loaded = 1;
2412 fx_save(&vcpu->host_fx_image);
2413 fx_restore(&vcpu->guest_fx_image);
2414}
2415EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
2416
2417void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
2418{
2419 if (!vcpu->guest_fpu_loaded)
2420 return;
2421
2422 vcpu->guest_fpu_loaded = 0;
2423 fx_save(&vcpu->guest_fx_image);
2424 fx_restore(&vcpu->host_fx_image);
f096ed85 2425 ++vcpu->stat.fpu_reload;
d0752060
HB
2426}
2427EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
e9b11c17
ZX
2428
2429void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
2430{
2431 kvm_x86_ops->vcpu_free(vcpu);
2432}
2433
2434struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2435 unsigned int id)
2436{
2437 int r;
2438 struct kvm_vcpu *vcpu = kvm_x86_ops->vcpu_create(kvm, id);
2439
2440 if (IS_ERR(vcpu)) {
2441 r = -ENOMEM;
2442 goto fail;
2443 }
2444
2445 /* We do fxsave: this must be aligned. */
2446 BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF);
2447
2448 vcpu_load(vcpu);
2449 r = kvm_arch_vcpu_reset(vcpu);
2450 if (r == 0)
2451 r = kvm_mmu_setup(vcpu);
2452 vcpu_put(vcpu);
2453 if (r < 0)
2454 goto free_vcpu;
2455
2456 return vcpu;
2457free_vcpu:
2458 kvm_x86_ops->vcpu_free(vcpu);
2459fail:
2460 return ERR_PTR(r);
2461}
2462
2463void kvm_arch_vcpu_destory(struct kvm_vcpu *vcpu)
2464{
2465 vcpu_load(vcpu);
2466 kvm_mmu_unload(vcpu);
2467 vcpu_put(vcpu);
2468
2469 kvm_x86_ops->vcpu_free(vcpu);
2470}
2471
2472int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
2473{
2474 return kvm_x86_ops->vcpu_reset(vcpu);
2475}
2476
2477void kvm_arch_hardware_enable(void *garbage)
2478{
2479 kvm_x86_ops->hardware_enable(garbage);
2480}
2481
2482void kvm_arch_hardware_disable(void *garbage)
2483{
2484 kvm_x86_ops->hardware_disable(garbage);
2485}
2486
2487int kvm_arch_hardware_setup(void)
2488{
2489 return kvm_x86_ops->hardware_setup();
2490}
2491
2492void kvm_arch_hardware_unsetup(void)
2493{
2494 kvm_x86_ops->hardware_unsetup();
2495}
2496
2497void kvm_arch_check_processor_compat(void *rtn)
2498{
2499 kvm_x86_ops->check_processor_compatibility(rtn);
2500}
2501
2502int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2503{
2504 struct page *page;
2505 struct kvm *kvm;
2506 int r;
2507
2508 BUG_ON(vcpu->kvm == NULL);
2509 kvm = vcpu->kvm;
2510
2511 vcpu->mmu.root_hpa = INVALID_PAGE;
2512 if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
2513 vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
2514 else
2515 vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED;
2516
2517 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
2518 if (!page) {
2519 r = -ENOMEM;
2520 goto fail;
2521 }
2522 vcpu->pio_data = page_address(page);
2523
2524 r = kvm_mmu_create(vcpu);
2525 if (r < 0)
2526 goto fail_free_pio_data;
2527
2528 if (irqchip_in_kernel(kvm)) {
2529 r = kvm_create_lapic(vcpu);
2530 if (r < 0)
2531 goto fail_mmu_destroy;
2532 }
2533
2534 return 0;
2535
2536fail_mmu_destroy:
2537 kvm_mmu_destroy(vcpu);
2538fail_free_pio_data:
2539 free_page((unsigned long)vcpu->pio_data);
2540fail:
2541 return r;
2542}
2543
2544void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
2545{
2546 kvm_free_lapic(vcpu);
2547 kvm_mmu_destroy(vcpu);
2548 free_page((unsigned long)vcpu->pio_data);
2549}
d19a9cd2
ZX
2550
2551struct kvm *kvm_arch_create_vm(void)
2552{
2553 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
2554
2555 if (!kvm)
2556 return ERR_PTR(-ENOMEM);
2557
2558 INIT_LIST_HEAD(&kvm->active_mmu_pages);
2559
2560 return kvm;
2561}
2562
2563static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
2564{
2565 vcpu_load(vcpu);
2566 kvm_mmu_unload(vcpu);
2567 vcpu_put(vcpu);
2568}
2569
2570static void kvm_free_vcpus(struct kvm *kvm)
2571{
2572 unsigned int i;
2573
2574 /*
2575 * Unpin any mmu pages first.
2576 */
2577 for (i = 0; i < KVM_MAX_VCPUS; ++i)
2578 if (kvm->vcpus[i])
2579 kvm_unload_vcpu_mmu(kvm->vcpus[i]);
2580 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
2581 if (kvm->vcpus[i]) {
2582 kvm_arch_vcpu_free(kvm->vcpus[i]);
2583 kvm->vcpus[i] = NULL;
2584 }
2585 }
2586
2587}
2588
2589void kvm_arch_destroy_vm(struct kvm *kvm)
2590{
2591 kfree(kvm->vpic);
2592 kfree(kvm->vioapic);
2593 kvm_free_vcpus(kvm);
2594 kvm_free_physmem(kvm);
2595 kfree(kvm);
2596}