2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
16 #include <linux/compiler.h>
17 #include <linux/err.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/module.h>
24 #include <linux/slab.h>
25 #include <linux/timer.h>
26 #include <asm/asm-offsets.h>
27 #include <asm/lowcore.h>
28 #include <asm/pgtable.h>
30 #include <asm/switch_to.h>
31 #include <asm/facility.h>
36 #define CREATE_TRACE_POINTS
38 #include "trace-s390.h"
40 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
42 struct kvm_stats_debugfs_item debugfs_entries[] = {
43 { "userspace_handled", VCPU_STAT(exit_userspace) },
44 { "exit_null", VCPU_STAT(exit_null) },
45 { "exit_validity", VCPU_STAT(exit_validity) },
46 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
47 { "exit_external_request", VCPU_STAT(exit_external_request) },
48 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
49 { "exit_instruction", VCPU_STAT(exit_instruction) },
50 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
51 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
52 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
53 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
54 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
55 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
56 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
57 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
58 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
59 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
60 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
61 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
62 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
63 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
64 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
65 { "instruction_spx", VCPU_STAT(instruction_spx) },
66 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
67 { "instruction_stap", VCPU_STAT(instruction_stap) },
68 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
69 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
70 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
71 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
72 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
73 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
74 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
75 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
76 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
77 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
78 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
79 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
80 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
81 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
82 { "diagnose_10", VCPU_STAT(diagnose_10) },
83 { "diagnose_44", VCPU_STAT(diagnose_44) },
84 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
88 unsigned long *vfacilities;
89 static struct gmap_notifier gmap_notifier;
91 /* test availability of vfacility */
92 static inline int test_vfacility(unsigned long nr)
94 return __test_facility(nr, (void *) vfacilities);
97 /* Section: not file related */
98 int kvm_arch_hardware_enable(void *garbage)
100 /* every s390 is virtualization enabled ;-) */
104 void kvm_arch_hardware_disable(void *garbage)
108 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
110 int kvm_arch_hardware_setup(void)
112 gmap_notifier.notifier_call = kvm_gmap_notifier;
113 gmap_register_ipte_notifier(&gmap_notifier);
117 void kvm_arch_hardware_unsetup(void)
119 gmap_unregister_ipte_notifier(&gmap_notifier);
122 void kvm_arch_check_processor_compat(void *rtn)
126 int kvm_arch_init(void *opaque)
131 void kvm_arch_exit(void)
135 /* Section: device related */
136 long kvm_arch_dev_ioctl(struct file *filp,
137 unsigned int ioctl, unsigned long arg)
139 if (ioctl == KVM_S390_ENABLE_SIE)
140 return s390_enable_sie();
144 int kvm_dev_ioctl_check_extension(long ext)
149 case KVM_CAP_S390_PSW:
150 case KVM_CAP_S390_GMAP:
151 case KVM_CAP_SYNC_MMU:
152 #ifdef CONFIG_KVM_S390_UCONTROL
153 case KVM_CAP_S390_UCONTROL:
155 case KVM_CAP_ASYNC_PF:
156 case KVM_CAP_SYNC_REGS:
157 case KVM_CAP_ONE_REG:
158 case KVM_CAP_ENABLE_CAP:
159 case KVM_CAP_S390_CSS_SUPPORT:
160 case KVM_CAP_IOEVENTFD:
161 case KVM_CAP_DEVICE_CTRL:
162 case KVM_CAP_ENABLE_CAP_VM:
165 case KVM_CAP_NR_VCPUS:
166 case KVM_CAP_MAX_VCPUS:
169 case KVM_CAP_NR_MEMSLOTS:
170 r = KVM_USER_MEM_SLOTS;
172 case KVM_CAP_S390_COW:
173 r = MACHINE_HAS_ESOP;
181 /* Section: vm related */
183 * Get (and clear) the dirty memory log for a memory slot.
185 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
186 struct kvm_dirty_log *log)
191 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
199 case KVM_CAP_S390_IRQCHIP:
200 kvm->arch.use_irqchip = 1;
210 long kvm_arch_vm_ioctl(struct file *filp,
211 unsigned int ioctl, unsigned long arg)
213 struct kvm *kvm = filp->private_data;
214 void __user *argp = (void __user *)arg;
218 case KVM_S390_INTERRUPT: {
219 struct kvm_s390_interrupt s390int;
222 if (copy_from_user(&s390int, argp, sizeof(s390int)))
224 r = kvm_s390_inject_vm(kvm, &s390int);
227 case KVM_ENABLE_CAP: {
228 struct kvm_enable_cap cap;
230 if (copy_from_user(&cap, argp, sizeof(cap)))
232 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
235 case KVM_CREATE_IRQCHIP: {
236 struct kvm_irq_routing_entry routing;
239 if (kvm->arch.use_irqchip) {
240 /* Set up dummy routing. */
241 memset(&routing, 0, sizeof(routing));
242 kvm_set_irq_routing(kvm, &routing, 0, 0);
254 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
258 static unsigned long sca_offset;
261 #ifdef CONFIG_KVM_S390_UCONTROL
262 if (type & ~KVM_VM_S390_UCONTROL)
264 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
271 rc = s390_enable_sie();
277 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
280 spin_lock(&kvm_lock);
281 sca_offset = (sca_offset + 16) & 0x7f0;
282 kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
283 spin_unlock(&kvm_lock);
285 sprintf(debug_name, "kvm-%u", current->pid);
287 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
291 spin_lock_init(&kvm->arch.float_int.lock);
292 INIT_LIST_HEAD(&kvm->arch.float_int.list);
294 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
295 VM_EVENT(kvm, 3, "%s", "vm created");
297 if (type & KVM_VM_S390_UCONTROL) {
298 kvm->arch.gmap = NULL;
300 kvm->arch.gmap = gmap_alloc(current->mm);
303 kvm->arch.gmap->private = kvm;
304 kvm->arch.gmap->pfault_enabled = 0;
307 kvm->arch.css_support = 0;
308 kvm->arch.use_irqchip = 0;
312 debug_unregister(kvm->arch.dbf);
314 free_page((unsigned long)(kvm->arch.sca));
319 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
321 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
322 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
323 kvm_clear_async_pf_completion_queue(vcpu);
324 if (!kvm_is_ucontrol(vcpu->kvm)) {
325 clear_bit(63 - vcpu->vcpu_id,
326 (unsigned long *) &vcpu->kvm->arch.sca->mcn);
327 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
328 (__u64) vcpu->arch.sie_block)
329 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
333 if (kvm_is_ucontrol(vcpu->kvm))
334 gmap_free(vcpu->arch.gmap);
336 free_page((unsigned long)(vcpu->arch.sie_block));
337 kvm_vcpu_uninit(vcpu);
338 kmem_cache_free(kvm_vcpu_cache, vcpu);
341 static void kvm_free_vcpus(struct kvm *kvm)
344 struct kvm_vcpu *vcpu;
346 kvm_for_each_vcpu(i, vcpu, kvm)
347 kvm_arch_vcpu_destroy(vcpu);
349 mutex_lock(&kvm->lock);
350 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
351 kvm->vcpus[i] = NULL;
353 atomic_set(&kvm->online_vcpus, 0);
354 mutex_unlock(&kvm->lock);
357 void kvm_arch_sync_events(struct kvm *kvm)
361 void kvm_arch_destroy_vm(struct kvm *kvm)
364 free_page((unsigned long)(kvm->arch.sca));
365 debug_unregister(kvm->arch.dbf);
366 if (!kvm_is_ucontrol(kvm))
367 gmap_free(kvm->arch.gmap);
368 kvm_s390_destroy_adapters(kvm);
371 /* Section: vcpu related */
372 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
374 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
375 kvm_clear_async_pf_completion_queue(vcpu);
376 if (kvm_is_ucontrol(vcpu->kvm)) {
377 vcpu->arch.gmap = gmap_alloc(current->mm);
378 if (!vcpu->arch.gmap)
380 vcpu->arch.gmap->private = vcpu->kvm;
384 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
385 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
392 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
397 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
399 save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
400 save_fp_regs(vcpu->arch.host_fpregs.fprs);
401 save_access_regs(vcpu->arch.host_acrs);
402 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
403 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
404 restore_access_regs(vcpu->run->s.regs.acrs);
405 gmap_enable(vcpu->arch.gmap);
406 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
409 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
411 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
412 gmap_disable(vcpu->arch.gmap);
413 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
414 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
415 save_access_regs(vcpu->run->s.regs.acrs);
416 restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
417 restore_fp_regs(vcpu->arch.host_fpregs.fprs);
418 restore_access_regs(vcpu->arch.host_acrs);
421 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
423 /* this equals initial cpu reset in pop, but we don't switch to ESA */
424 vcpu->arch.sie_block->gpsw.mask = 0UL;
425 vcpu->arch.sie_block->gpsw.addr = 0UL;
426 kvm_s390_set_prefix(vcpu, 0);
427 vcpu->arch.sie_block->cputm = 0UL;
428 vcpu->arch.sie_block->ckc = 0UL;
429 vcpu->arch.sie_block->todpr = 0;
430 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
431 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
432 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
433 vcpu->arch.guest_fpregs.fpc = 0;
434 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
435 vcpu->arch.sie_block->gbea = 1;
436 vcpu->arch.sie_block->pp = 0;
437 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
438 kvm_clear_async_pf_completion_queue(vcpu);
439 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
440 kvm_s390_clear_local_irqs(vcpu);
443 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
448 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
450 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
454 vcpu->arch.sie_block->ecb = 6;
455 if (test_vfacility(50) && test_vfacility(73))
456 vcpu->arch.sie_block->ecb |= 0x10;
458 vcpu->arch.sie_block->ecb2 = 8;
459 vcpu->arch.sie_block->eca = 0xC1002001U;
460 vcpu->arch.sie_block->fac = (int) (long) vfacilities;
461 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
462 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
463 (unsigned long) vcpu);
464 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
465 get_cpu_id(&vcpu->arch.cpu_id);
466 vcpu->arch.cpu_id.version = 0xff;
470 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
473 struct kvm_vcpu *vcpu;
474 struct sie_page *sie_page;
477 if (id >= KVM_MAX_VCPUS)
482 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
486 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
490 vcpu->arch.sie_block = &sie_page->sie_block;
491 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
493 vcpu->arch.sie_block->icpua = id;
494 if (!kvm_is_ucontrol(kvm)) {
495 if (!kvm->arch.sca) {
499 if (!kvm->arch.sca->cpu[id].sda)
500 kvm->arch.sca->cpu[id].sda =
501 (__u64) vcpu->arch.sie_block;
502 vcpu->arch.sie_block->scaoh =
503 (__u32)(((__u64)kvm->arch.sca) >> 32);
504 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
505 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
508 spin_lock_init(&vcpu->arch.local_int.lock);
509 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
510 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
511 vcpu->arch.local_int.wq = &vcpu->wq;
512 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
514 rc = kvm_vcpu_init(vcpu, kvm, id);
516 goto out_free_sie_block;
517 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
518 vcpu->arch.sie_block);
519 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
523 free_page((unsigned long)(vcpu->arch.sie_block));
525 kmem_cache_free(kvm_vcpu_cache, vcpu);
530 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
532 return kvm_cpu_has_interrupt(vcpu);
535 void s390_vcpu_block(struct kvm_vcpu *vcpu)
537 atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
540 void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
542 atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
546 * Kick a guest cpu out of SIE and wait until SIE is not running.
547 * If the CPU is not running (e.g. waiting as idle) the function will
548 * return immediately. */
549 void exit_sie(struct kvm_vcpu *vcpu)
551 atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
552 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
556 /* Kick a guest cpu out of SIE and prevent SIE-reentry */
557 void exit_sie_sync(struct kvm_vcpu *vcpu)
559 s390_vcpu_block(vcpu);
563 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
566 struct kvm *kvm = gmap->private;
567 struct kvm_vcpu *vcpu;
569 kvm_for_each_vcpu(i, vcpu, kvm) {
570 /* match against both prefix pages */
571 if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
572 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
573 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
579 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
581 /* kvm common code refers to this, but never calls it */
586 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
587 struct kvm_one_reg *reg)
592 case KVM_REG_S390_TODPR:
593 r = put_user(vcpu->arch.sie_block->todpr,
594 (u32 __user *)reg->addr);
596 case KVM_REG_S390_EPOCHDIFF:
597 r = put_user(vcpu->arch.sie_block->epoch,
598 (u64 __user *)reg->addr);
600 case KVM_REG_S390_CPU_TIMER:
601 r = put_user(vcpu->arch.sie_block->cputm,
602 (u64 __user *)reg->addr);
604 case KVM_REG_S390_CLOCK_COMP:
605 r = put_user(vcpu->arch.sie_block->ckc,
606 (u64 __user *)reg->addr);
608 case KVM_REG_S390_PFTOKEN:
609 r = put_user(vcpu->arch.pfault_token,
610 (u64 __user *)reg->addr);
612 case KVM_REG_S390_PFCOMPARE:
613 r = put_user(vcpu->arch.pfault_compare,
614 (u64 __user *)reg->addr);
616 case KVM_REG_S390_PFSELECT:
617 r = put_user(vcpu->arch.pfault_select,
618 (u64 __user *)reg->addr);
620 case KVM_REG_S390_PP:
621 r = put_user(vcpu->arch.sie_block->pp,
622 (u64 __user *)reg->addr);
624 case KVM_REG_S390_GBEA:
625 r = put_user(vcpu->arch.sie_block->gbea,
626 (u64 __user *)reg->addr);
635 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
636 struct kvm_one_reg *reg)
641 case KVM_REG_S390_TODPR:
642 r = get_user(vcpu->arch.sie_block->todpr,
643 (u32 __user *)reg->addr);
645 case KVM_REG_S390_EPOCHDIFF:
646 r = get_user(vcpu->arch.sie_block->epoch,
647 (u64 __user *)reg->addr);
649 case KVM_REG_S390_CPU_TIMER:
650 r = get_user(vcpu->arch.sie_block->cputm,
651 (u64 __user *)reg->addr);
653 case KVM_REG_S390_CLOCK_COMP:
654 r = get_user(vcpu->arch.sie_block->ckc,
655 (u64 __user *)reg->addr);
657 case KVM_REG_S390_PFTOKEN:
658 r = get_user(vcpu->arch.pfault_token,
659 (u64 __user *)reg->addr);
661 case KVM_REG_S390_PFCOMPARE:
662 r = get_user(vcpu->arch.pfault_compare,
663 (u64 __user *)reg->addr);
665 case KVM_REG_S390_PFSELECT:
666 r = get_user(vcpu->arch.pfault_select,
667 (u64 __user *)reg->addr);
669 case KVM_REG_S390_PP:
670 r = get_user(vcpu->arch.sie_block->pp,
671 (u64 __user *)reg->addr);
673 case KVM_REG_S390_GBEA:
674 r = get_user(vcpu->arch.sie_block->gbea,
675 (u64 __user *)reg->addr);
684 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
686 kvm_s390_vcpu_initial_reset(vcpu);
690 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
692 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
696 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
698 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
702 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
703 struct kvm_sregs *sregs)
705 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
706 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
707 restore_access_regs(vcpu->run->s.regs.acrs);
711 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
712 struct kvm_sregs *sregs)
714 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
715 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
719 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
721 if (test_fp_ctl(fpu->fpc))
723 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
724 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
725 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
726 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
730 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
732 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
733 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
737 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
741 if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
744 vcpu->run->psw_mask = psw.mask;
745 vcpu->run->psw_addr = psw.addr;
750 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
751 struct kvm_translation *tr)
753 return -EINVAL; /* not implemented yet */
756 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
757 struct kvm_guest_debug *dbg)
759 return -EINVAL; /* not implemented yet */
762 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
763 struct kvm_mp_state *mp_state)
765 return -EINVAL; /* not implemented yet */
768 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
769 struct kvm_mp_state *mp_state)
771 return -EINVAL; /* not implemented yet */
774 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
777 * We use MMU_RELOAD just to re-arm the ipte notifier for the
778 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
779 * This ensures that the ipte instruction for this request has
780 * already finished. We might race against a second unmapper that
781 * wants to set the blocking bit. Lets just retry the request loop.
783 while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
785 rc = gmap_ipte_notify(vcpu->arch.gmap,
786 vcpu->arch.sie_block->prefix,
790 s390_vcpu_unblock(vcpu);
795 static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
798 hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
799 struct mm_struct *mm = current->mm;
800 down_read(&mm->mmap_sem);
801 rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
802 up_read(&mm->mmap_sem);
806 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
809 struct kvm_s390_interrupt inti;
813 inti.type = KVM_S390_INT_PFAULT_INIT;
814 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
816 inti.type = KVM_S390_INT_PFAULT_DONE;
817 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
821 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
822 struct kvm_async_pf *work)
824 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
825 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
828 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
829 struct kvm_async_pf *work)
831 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
832 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
835 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
836 struct kvm_async_pf *work)
838 /* s390 will always inject the page directly */
841 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
844 * s390 will always inject the page directly,
845 * but we still want check_async_completion to cleanup
850 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
853 struct kvm_arch_async_pf arch;
856 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
858 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
859 vcpu->arch.pfault_compare)
861 if (psw_extint_disabled(vcpu))
863 if (kvm_cpu_has_interrupt(vcpu))
865 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
867 if (!vcpu->arch.gmap->pfault_enabled)
870 hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
871 if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
874 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
878 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
883 * On s390 notifications for arriving pages will be delivered directly
884 * to the guest but the house keeping for completed pfaults is
885 * handled outside the worker.
887 kvm_check_async_pf_completion(vcpu);
889 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
894 if (test_thread_flag(TIF_MCCK_PENDING))
897 if (!kvm_is_ucontrol(vcpu->kvm))
898 kvm_s390_deliver_pending_interrupts(vcpu);
900 rc = kvm_s390_handle_requests(vcpu);
904 vcpu->arch.sie_block->icptcode = 0;
905 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
906 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
907 trace_kvm_s390_sie_enter(vcpu, cpuflags);
912 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
916 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
917 vcpu->arch.sie_block->icptcode);
918 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
920 if (exit_reason >= 0) {
922 } else if (kvm_is_ucontrol(vcpu->kvm)) {
923 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
924 vcpu->run->s390_ucontrol.trans_exc_code =
925 current->thread.gmap_addr;
926 vcpu->run->s390_ucontrol.pgm_code = 0x10;
929 } else if (current->thread.gmap_pfault) {
930 trace_kvm_s390_major_guest_pfault(vcpu);
931 current->thread.gmap_pfault = 0;
932 if (kvm_arch_setup_async_pf(vcpu) ||
933 (kvm_arch_fault_in_sync(vcpu) >= 0))
938 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
939 trace_kvm_s390_sie_fault(vcpu);
940 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
943 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
946 if (kvm_is_ucontrol(vcpu->kvm))
947 /* Don't exit for host interrupts. */
948 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
950 rc = kvm_handle_sie_intercept(vcpu);
956 static int __vcpu_run(struct kvm_vcpu *vcpu)
961 * We try to hold kvm->srcu during most of vcpu_run (except when run-
962 * ning the guest), so that memslots (and other stuff) are protected
964 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
967 rc = vcpu_pre_run(vcpu);
971 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
973 * As PF_VCPU will be used in fault handler, between
974 * guest_enter and guest_exit should be no uaccess.
979 exit_reason = sie64a(vcpu->arch.sie_block,
980 vcpu->run->s.regs.gprs);
982 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
984 rc = vcpu_post_run(vcpu, exit_reason);
985 } while (!signal_pending(current) && !rc);
987 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
991 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
996 if (vcpu->sigset_active)
997 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
999 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
1001 switch (kvm_run->exit_reason) {
1002 case KVM_EXIT_S390_SIEIC:
1003 case KVM_EXIT_UNKNOWN:
1005 case KVM_EXIT_S390_RESET:
1006 case KVM_EXIT_S390_UCONTROL:
1007 case KVM_EXIT_S390_TSCH:
1013 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
1014 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
1015 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
1016 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
1017 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1019 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
1020 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
1021 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
1022 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1026 rc = __vcpu_run(vcpu);
1028 if (signal_pending(current) && !rc) {
1029 kvm_run->exit_reason = KVM_EXIT_INTR;
1033 if (rc == -EOPNOTSUPP) {
1034 /* intercept cannot be handled in-kernel, prepare kvm-run */
1035 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
1036 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
1037 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
1038 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
1042 if (rc == -EREMOTE) {
1043 /* intercept was handled, but userspace support is needed
1044 * kvm_run has been prepared by the handler */
1048 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
1049 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
1050 kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix;
1051 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
1053 if (vcpu->sigset_active)
1054 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1056 vcpu->stat.exit_userspace++;
1060 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
1061 unsigned long n, int prefix)
1064 return copy_to_guest(vcpu, guestdest, from, n);
1066 return copy_to_guest_absolute(vcpu, guestdest, from, n);
1070 * store status at address
1071 * we use have two special cases:
1072 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
1073 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
1075 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
1077 unsigned char archmode = 1;
1081 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
1082 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
1084 addr = SAVE_AREA_BASE;
1086 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
1087 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
1089 addr = SAVE_AREA_BASE;
1094 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
1095 vcpu->arch.guest_fpregs.fprs, 128, prefix))
1098 if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
1099 vcpu->run->s.regs.gprs, 128, prefix))
1102 if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
1103 &vcpu->arch.sie_block->gpsw, 16, prefix))
1106 if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
1107 &vcpu->arch.sie_block->prefix, 4, prefix))
1110 if (__guestcopy(vcpu,
1111 addr + offsetof(struct save_area, fp_ctrl_reg),
1112 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
1115 if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
1116 &vcpu->arch.sie_block->todpr, 4, prefix))
1119 if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
1120 &vcpu->arch.sie_block->cputm, 8, prefix))
1123 clkcomp = vcpu->arch.sie_block->ckc >> 8;
1124 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
1125 &clkcomp, 8, prefix))
1128 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
1129 &vcpu->run->s.regs.acrs, 64, prefix))
1132 if (__guestcopy(vcpu,
1133 addr + offsetof(struct save_area, ctrl_regs),
1134 &vcpu->arch.sie_block->gcr, 128, prefix))
1139 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
1142 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
1143 * copying in vcpu load/put. Lets update our copies before we save
1144 * it into the save area
1146 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1147 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
1148 save_access_regs(vcpu->run->s.regs.acrs);
1150 return kvm_s390_store_status_unloaded(vcpu, addr);
1153 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
1154 struct kvm_enable_cap *cap)
1162 case KVM_CAP_S390_CSS_SUPPORT:
1163 if (!vcpu->kvm->arch.css_support) {
1164 vcpu->kvm->arch.css_support = 1;
1165 trace_kvm_s390_enable_css(vcpu->kvm);
1176 long kvm_arch_vcpu_ioctl(struct file *filp,
1177 unsigned int ioctl, unsigned long arg)
1179 struct kvm_vcpu *vcpu = filp->private_data;
1180 void __user *argp = (void __user *)arg;
1185 case KVM_S390_INTERRUPT: {
1186 struct kvm_s390_interrupt s390int;
1189 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1191 r = kvm_s390_inject_vcpu(vcpu, &s390int);
1194 case KVM_S390_STORE_STATUS:
1195 idx = srcu_read_lock(&vcpu->kvm->srcu);
1196 r = kvm_s390_vcpu_store_status(vcpu, arg);
1197 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1199 case KVM_S390_SET_INITIAL_PSW: {
1203 if (copy_from_user(&psw, argp, sizeof(psw)))
1205 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
1208 case KVM_S390_INITIAL_RESET:
1209 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
1211 case KVM_SET_ONE_REG:
1212 case KVM_GET_ONE_REG: {
1213 struct kvm_one_reg reg;
1215 if (copy_from_user(®, argp, sizeof(reg)))
1217 if (ioctl == KVM_SET_ONE_REG)
1218 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
1220 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
1223 #ifdef CONFIG_KVM_S390_UCONTROL
1224 case KVM_S390_UCAS_MAP: {
1225 struct kvm_s390_ucas_mapping ucasmap;
1227 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1232 if (!kvm_is_ucontrol(vcpu->kvm)) {
1237 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
1238 ucasmap.vcpu_addr, ucasmap.length);
1241 case KVM_S390_UCAS_UNMAP: {
1242 struct kvm_s390_ucas_mapping ucasmap;
1244 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1249 if (!kvm_is_ucontrol(vcpu->kvm)) {
1254 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
1259 case KVM_S390_VCPU_FAULT: {
1260 r = gmap_fault(arg, vcpu->arch.gmap);
1261 if (!IS_ERR_VALUE(r))
1265 case KVM_ENABLE_CAP:
1267 struct kvm_enable_cap cap;
1269 if (copy_from_user(&cap, argp, sizeof(cap)))
1271 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
1280 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1282 #ifdef CONFIG_KVM_S390_UCONTROL
1283 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
1284 && (kvm_is_ucontrol(vcpu->kvm))) {
1285 vmf->page = virt_to_page(vcpu->arch.sie_block);
1286 get_page(vmf->page);
1290 return VM_FAULT_SIGBUS;
1293 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1294 struct kvm_memory_slot *dont)
1298 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1299 unsigned long npages)
1304 void kvm_arch_memslots_updated(struct kvm *kvm)
1308 /* Section: memory related */
1309 int kvm_arch_prepare_memory_region(struct kvm *kvm,
1310 struct kvm_memory_slot *memslot,
1311 struct kvm_userspace_memory_region *mem,
1312 enum kvm_mr_change change)
1314 /* A few sanity checks. We can have memory slots which have to be
1315 located/ended at a segment boundary (1MB). The memory in userland is
1316 ok to be fragmented into various different vmas. It is okay to mmap()
1317 and munmap() stuff in this slot after doing this call at any time */
1319 if (mem->userspace_addr & 0xffffful)
1322 if (mem->memory_size & 0xffffful)
1328 void kvm_arch_commit_memory_region(struct kvm *kvm,
1329 struct kvm_userspace_memory_region *mem,
1330 const struct kvm_memory_slot *old,
1331 enum kvm_mr_change change)
1335 /* If the basics of the memslot do not change, we do not want
1336 * to update the gmap. Every update causes several unnecessary
1337 * segment translation exceptions. This is usually handled just
1338 * fine by the normal fault handler + gmap, but it will also
1339 * cause faults on the prefix page of running guest CPUs.
1341 if (old->userspace_addr == mem->userspace_addr &&
1342 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
1343 old->npages * PAGE_SIZE == mem->memory_size)
1346 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
1347 mem->guest_phys_addr, mem->memory_size);
1349 printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
1353 void kvm_arch_flush_shadow_all(struct kvm *kvm)
1357 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1358 struct kvm_memory_slot *slot)
1362 static int __init kvm_s390_init(void)
1365 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1370 * guests can ask for up to 255+1 double words, we need a full page
1371 * to hold the maximum amount of facilities. On the other hand, we
1372 * only set facilities that are known to work in KVM.
1374 vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
1379 memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
1380 vfacilities[0] &= 0xff82fff3f4fc2000UL;
1381 vfacilities[1] &= 0x005c000000000000UL;
1385 static void __exit kvm_s390_exit(void)
1387 free_page((unsigned long) vfacilities);
1391 module_init(kvm_s390_init);
1392 module_exit(kvm_s390_exit);
1395 * Enable autoloading of the kvm module.
1396 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
1397 * since x86 takes a different approach.
1399 #include <linux/miscdevice.h>
1400 MODULE_ALIAS_MISCDEV(KVM_MINOR);
1401 MODULE_ALIAS("devname:kvm");