2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
16 #include <linux/compiler.h>
17 #include <linux/err.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/module.h>
24 #include <linux/slab.h>
25 #include <linux/timer.h>
26 #include <asm/asm-offsets.h>
27 #include <asm/lowcore.h>
28 #include <asm/pgtable.h>
30 #include <asm/switch_to.h>
31 #include <asm/facility.h>
36 #define CREATE_TRACE_POINTS
38 #include "trace-s390.h"
40 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
42 struct kvm_stats_debugfs_item debugfs_entries[] = {
43 { "userspace_handled", VCPU_STAT(exit_userspace) },
44 { "exit_null", VCPU_STAT(exit_null) },
45 { "exit_validity", VCPU_STAT(exit_validity) },
46 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
47 { "exit_external_request", VCPU_STAT(exit_external_request) },
48 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
49 { "exit_instruction", VCPU_STAT(exit_instruction) },
50 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
51 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
52 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
53 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
54 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
55 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
56 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
57 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
58 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
59 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
60 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
61 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
62 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
63 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
64 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
65 { "instruction_spx", VCPU_STAT(instruction_spx) },
66 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
67 { "instruction_stap", VCPU_STAT(instruction_stap) },
68 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
69 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
70 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
71 { "instruction_essa", VCPU_STAT(instruction_essa) },
72 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
73 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
74 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
75 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
76 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
77 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
78 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
79 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
80 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
81 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
82 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
83 { "diagnose_10", VCPU_STAT(diagnose_10) },
84 { "diagnose_44", VCPU_STAT(diagnose_44) },
85 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
89 unsigned long *vfacilities;
90 static struct gmap_notifier gmap_notifier;
92 /* test availability of vfacility */
93 static inline int test_vfacility(unsigned long nr)
95 return __test_facility(nr, (void *) vfacilities);
98 /* Section: not file related */
99 int kvm_arch_hardware_enable(void *garbage)
101 /* every s390 is virtualization enabled ;-) */
105 void kvm_arch_hardware_disable(void *garbage)
109 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
111 int kvm_arch_hardware_setup(void)
113 gmap_notifier.notifier_call = kvm_gmap_notifier;
114 gmap_register_ipte_notifier(&gmap_notifier);
118 void kvm_arch_hardware_unsetup(void)
120 gmap_unregister_ipte_notifier(&gmap_notifier);
123 void kvm_arch_check_processor_compat(void *rtn)
127 int kvm_arch_init(void *opaque)
132 void kvm_arch_exit(void)
136 /* Section: device related */
137 long kvm_arch_dev_ioctl(struct file *filp,
138 unsigned int ioctl, unsigned long arg)
140 if (ioctl == KVM_S390_ENABLE_SIE)
141 return s390_enable_sie();
145 int kvm_dev_ioctl_check_extension(long ext)
150 case KVM_CAP_S390_PSW:
151 case KVM_CAP_S390_GMAP:
152 case KVM_CAP_SYNC_MMU:
153 #ifdef CONFIG_KVM_S390_UCONTROL
154 case KVM_CAP_S390_UCONTROL:
156 case KVM_CAP_ASYNC_PF:
157 case KVM_CAP_SYNC_REGS:
158 case KVM_CAP_ONE_REG:
159 case KVM_CAP_ENABLE_CAP:
160 case KVM_CAP_S390_CSS_SUPPORT:
161 case KVM_CAP_IOEVENTFD:
162 case KVM_CAP_DEVICE_CTRL:
163 case KVM_CAP_ENABLE_CAP_VM:
166 case KVM_CAP_NR_VCPUS:
167 case KVM_CAP_MAX_VCPUS:
170 case KVM_CAP_NR_MEMSLOTS:
171 r = KVM_USER_MEM_SLOTS;
173 case KVM_CAP_S390_COW:
174 r = MACHINE_HAS_ESOP;
182 /* Section: vm related */
184 * Get (and clear) the dirty memory log for a memory slot.
186 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
187 struct kvm_dirty_log *log)
192 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
200 case KVM_CAP_S390_IRQCHIP:
201 kvm->arch.use_irqchip = 1;
211 long kvm_arch_vm_ioctl(struct file *filp,
212 unsigned int ioctl, unsigned long arg)
214 struct kvm *kvm = filp->private_data;
215 void __user *argp = (void __user *)arg;
219 case KVM_S390_INTERRUPT: {
220 struct kvm_s390_interrupt s390int;
223 if (copy_from_user(&s390int, argp, sizeof(s390int)))
225 r = kvm_s390_inject_vm(kvm, &s390int);
228 case KVM_ENABLE_CAP: {
229 struct kvm_enable_cap cap;
231 if (copy_from_user(&cap, argp, sizeof(cap)))
233 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
236 case KVM_CREATE_IRQCHIP: {
237 struct kvm_irq_routing_entry routing;
240 if (kvm->arch.use_irqchip) {
241 /* Set up dummy routing. */
242 memset(&routing, 0, sizeof(routing));
243 kvm_set_irq_routing(kvm, &routing, 0, 0);
255 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
259 static unsigned long sca_offset;
262 #ifdef CONFIG_KVM_S390_UCONTROL
263 if (type & ~KVM_VM_S390_UCONTROL)
265 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
272 rc = s390_enable_sie();
278 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
281 spin_lock(&kvm_lock);
282 sca_offset = (sca_offset + 16) & 0x7f0;
283 kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
284 spin_unlock(&kvm_lock);
286 sprintf(debug_name, "kvm-%u", current->pid);
288 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
292 spin_lock_init(&kvm->arch.float_int.lock);
293 INIT_LIST_HEAD(&kvm->arch.float_int.list);
295 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
296 VM_EVENT(kvm, 3, "%s", "vm created");
298 if (type & KVM_VM_S390_UCONTROL) {
299 kvm->arch.gmap = NULL;
301 kvm->arch.gmap = gmap_alloc(current->mm);
304 kvm->arch.gmap->private = kvm;
305 kvm->arch.gmap->pfault_enabled = 0;
308 kvm->arch.css_support = 0;
309 kvm->arch.use_irqchip = 0;
313 debug_unregister(kvm->arch.dbf);
315 free_page((unsigned long)(kvm->arch.sca));
320 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
322 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
323 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
324 kvm_clear_async_pf_completion_queue(vcpu);
325 if (!kvm_is_ucontrol(vcpu->kvm)) {
326 clear_bit(63 - vcpu->vcpu_id,
327 (unsigned long *) &vcpu->kvm->arch.sca->mcn);
328 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
329 (__u64) vcpu->arch.sie_block)
330 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
334 if (kvm_is_ucontrol(vcpu->kvm))
335 gmap_free(vcpu->arch.gmap);
337 if (vcpu->arch.sie_block->cbrlo)
338 __free_page(__pfn_to_page(
339 vcpu->arch.sie_block->cbrlo >> PAGE_SHIFT));
340 free_page((unsigned long)(vcpu->arch.sie_block));
342 kvm_vcpu_uninit(vcpu);
343 kmem_cache_free(kvm_vcpu_cache, vcpu);
346 static void kvm_free_vcpus(struct kvm *kvm)
349 struct kvm_vcpu *vcpu;
351 kvm_for_each_vcpu(i, vcpu, kvm)
352 kvm_arch_vcpu_destroy(vcpu);
354 mutex_lock(&kvm->lock);
355 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
356 kvm->vcpus[i] = NULL;
358 atomic_set(&kvm->online_vcpus, 0);
359 mutex_unlock(&kvm->lock);
362 void kvm_arch_sync_events(struct kvm *kvm)
366 void kvm_arch_destroy_vm(struct kvm *kvm)
369 free_page((unsigned long)(kvm->arch.sca));
370 debug_unregister(kvm->arch.dbf);
371 if (!kvm_is_ucontrol(kvm))
372 gmap_free(kvm->arch.gmap);
373 kvm_s390_destroy_adapters(kvm);
376 /* Section: vcpu related */
377 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
379 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
380 kvm_clear_async_pf_completion_queue(vcpu);
381 if (kvm_is_ucontrol(vcpu->kvm)) {
382 vcpu->arch.gmap = gmap_alloc(current->mm);
383 if (!vcpu->arch.gmap)
385 vcpu->arch.gmap->private = vcpu->kvm;
389 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
390 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
397 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
402 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
404 save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
405 save_fp_regs(vcpu->arch.host_fpregs.fprs);
406 save_access_regs(vcpu->arch.host_acrs);
407 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
408 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
409 restore_access_regs(vcpu->run->s.regs.acrs);
410 gmap_enable(vcpu->arch.gmap);
411 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
414 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
416 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
417 gmap_disable(vcpu->arch.gmap);
418 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
419 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
420 save_access_regs(vcpu->run->s.regs.acrs);
421 restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
422 restore_fp_regs(vcpu->arch.host_fpregs.fprs);
423 restore_access_regs(vcpu->arch.host_acrs);
426 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
428 /* this equals initial cpu reset in pop, but we don't switch to ESA */
429 vcpu->arch.sie_block->gpsw.mask = 0UL;
430 vcpu->arch.sie_block->gpsw.addr = 0UL;
431 kvm_s390_set_prefix(vcpu, 0);
432 vcpu->arch.sie_block->cputm = 0UL;
433 vcpu->arch.sie_block->ckc = 0UL;
434 vcpu->arch.sie_block->todpr = 0;
435 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
436 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
437 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
438 vcpu->arch.guest_fpregs.fpc = 0;
439 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
440 vcpu->arch.sie_block->gbea = 1;
441 vcpu->arch.sie_block->pp = 0;
442 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
443 kvm_clear_async_pf_completion_queue(vcpu);
444 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
445 kvm_s390_clear_local_irqs(vcpu);
448 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
453 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
457 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
461 vcpu->arch.sie_block->ecb = 6;
462 if (test_vfacility(50) && test_vfacility(73))
463 vcpu->arch.sie_block->ecb |= 0x10;
465 vcpu->arch.sie_block->ecb2 = 8;
466 vcpu->arch.sie_block->eca = 0xC1002001U;
467 vcpu->arch.sie_block->fac = (int) (long) vfacilities;
468 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
469 if (kvm_enabled_cmma()) {
470 cbrl = alloc_page(GFP_KERNEL | __GFP_ZERO);
472 vcpu->arch.sie_block->ecb2 |= 0x80;
473 vcpu->arch.sie_block->ecb2 &= ~0x08;
474 vcpu->arch.sie_block->cbrlo = page_to_phys(cbrl);
477 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
478 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
479 (unsigned long) vcpu);
480 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
481 get_cpu_id(&vcpu->arch.cpu_id);
482 vcpu->arch.cpu_id.version = 0xff;
486 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
489 struct kvm_vcpu *vcpu;
490 struct sie_page *sie_page;
493 if (id >= KVM_MAX_VCPUS)
498 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
502 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
506 vcpu->arch.sie_block = &sie_page->sie_block;
507 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
509 vcpu->arch.sie_block->icpua = id;
510 if (!kvm_is_ucontrol(kvm)) {
511 if (!kvm->arch.sca) {
515 if (!kvm->arch.sca->cpu[id].sda)
516 kvm->arch.sca->cpu[id].sda =
517 (__u64) vcpu->arch.sie_block;
518 vcpu->arch.sie_block->scaoh =
519 (__u32)(((__u64)kvm->arch.sca) >> 32);
520 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
521 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
524 spin_lock_init(&vcpu->arch.local_int.lock);
525 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
526 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
527 vcpu->arch.local_int.wq = &vcpu->wq;
528 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
530 rc = kvm_vcpu_init(vcpu, kvm, id);
532 goto out_free_sie_block;
533 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
534 vcpu->arch.sie_block);
535 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
539 free_page((unsigned long)(vcpu->arch.sie_block));
541 kmem_cache_free(kvm_vcpu_cache, vcpu);
546 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
548 return kvm_cpu_has_interrupt(vcpu);
551 void s390_vcpu_block(struct kvm_vcpu *vcpu)
553 atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
556 void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
558 atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
562 * Kick a guest cpu out of SIE and wait until SIE is not running.
563 * If the CPU is not running (e.g. waiting as idle) the function will
564 * return immediately. */
565 void exit_sie(struct kvm_vcpu *vcpu)
567 atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
568 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
572 /* Kick a guest cpu out of SIE and prevent SIE-reentry */
573 void exit_sie_sync(struct kvm_vcpu *vcpu)
575 s390_vcpu_block(vcpu);
579 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
582 struct kvm *kvm = gmap->private;
583 struct kvm_vcpu *vcpu;
585 kvm_for_each_vcpu(i, vcpu, kvm) {
586 /* match against both prefix pages */
587 if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
588 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
589 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
595 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
597 /* kvm common code refers to this, but never calls it */
602 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
603 struct kvm_one_reg *reg)
608 case KVM_REG_S390_TODPR:
609 r = put_user(vcpu->arch.sie_block->todpr,
610 (u32 __user *)reg->addr);
612 case KVM_REG_S390_EPOCHDIFF:
613 r = put_user(vcpu->arch.sie_block->epoch,
614 (u64 __user *)reg->addr);
616 case KVM_REG_S390_CPU_TIMER:
617 r = put_user(vcpu->arch.sie_block->cputm,
618 (u64 __user *)reg->addr);
620 case KVM_REG_S390_CLOCK_COMP:
621 r = put_user(vcpu->arch.sie_block->ckc,
622 (u64 __user *)reg->addr);
624 case KVM_REG_S390_PFTOKEN:
625 r = put_user(vcpu->arch.pfault_token,
626 (u64 __user *)reg->addr);
628 case KVM_REG_S390_PFCOMPARE:
629 r = put_user(vcpu->arch.pfault_compare,
630 (u64 __user *)reg->addr);
632 case KVM_REG_S390_PFSELECT:
633 r = put_user(vcpu->arch.pfault_select,
634 (u64 __user *)reg->addr);
636 case KVM_REG_S390_PP:
637 r = put_user(vcpu->arch.sie_block->pp,
638 (u64 __user *)reg->addr);
640 case KVM_REG_S390_GBEA:
641 r = put_user(vcpu->arch.sie_block->gbea,
642 (u64 __user *)reg->addr);
651 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
652 struct kvm_one_reg *reg)
657 case KVM_REG_S390_TODPR:
658 r = get_user(vcpu->arch.sie_block->todpr,
659 (u32 __user *)reg->addr);
661 case KVM_REG_S390_EPOCHDIFF:
662 r = get_user(vcpu->arch.sie_block->epoch,
663 (u64 __user *)reg->addr);
665 case KVM_REG_S390_CPU_TIMER:
666 r = get_user(vcpu->arch.sie_block->cputm,
667 (u64 __user *)reg->addr);
669 case KVM_REG_S390_CLOCK_COMP:
670 r = get_user(vcpu->arch.sie_block->ckc,
671 (u64 __user *)reg->addr);
673 case KVM_REG_S390_PFTOKEN:
674 r = get_user(vcpu->arch.pfault_token,
675 (u64 __user *)reg->addr);
677 case KVM_REG_S390_PFCOMPARE:
678 r = get_user(vcpu->arch.pfault_compare,
679 (u64 __user *)reg->addr);
681 case KVM_REG_S390_PFSELECT:
682 r = get_user(vcpu->arch.pfault_select,
683 (u64 __user *)reg->addr);
685 case KVM_REG_S390_PP:
686 r = get_user(vcpu->arch.sie_block->pp,
687 (u64 __user *)reg->addr);
689 case KVM_REG_S390_GBEA:
690 r = get_user(vcpu->arch.sie_block->gbea,
691 (u64 __user *)reg->addr);
700 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
702 kvm_s390_vcpu_initial_reset(vcpu);
706 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
708 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
712 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
714 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
718 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
719 struct kvm_sregs *sregs)
721 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
722 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
723 restore_access_regs(vcpu->run->s.regs.acrs);
727 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
728 struct kvm_sregs *sregs)
730 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
731 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
735 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
737 if (test_fp_ctl(fpu->fpc))
739 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
740 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
741 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
742 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
746 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
748 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
749 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
753 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
757 if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
760 vcpu->run->psw_mask = psw.mask;
761 vcpu->run->psw_addr = psw.addr;
766 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
767 struct kvm_translation *tr)
769 return -EINVAL; /* not implemented yet */
772 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
773 struct kvm_guest_debug *dbg)
775 return -EINVAL; /* not implemented yet */
778 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
779 struct kvm_mp_state *mp_state)
781 return -EINVAL; /* not implemented yet */
784 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
785 struct kvm_mp_state *mp_state)
787 return -EINVAL; /* not implemented yet */
790 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
793 * We use MMU_RELOAD just to re-arm the ipte notifier for the
794 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
795 * This ensures that the ipte instruction for this request has
796 * already finished. We might race against a second unmapper that
797 * wants to set the blocking bit. Lets just retry the request loop.
799 while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
801 rc = gmap_ipte_notify(vcpu->arch.gmap,
802 vcpu->arch.sie_block->prefix,
806 s390_vcpu_unblock(vcpu);
811 static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
814 hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
815 struct mm_struct *mm = current->mm;
816 down_read(&mm->mmap_sem);
817 rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
818 up_read(&mm->mmap_sem);
822 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
825 struct kvm_s390_interrupt inti;
829 inti.type = KVM_S390_INT_PFAULT_INIT;
830 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
832 inti.type = KVM_S390_INT_PFAULT_DONE;
833 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
837 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
838 struct kvm_async_pf *work)
840 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
841 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
844 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
845 struct kvm_async_pf *work)
847 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
848 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
851 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
852 struct kvm_async_pf *work)
854 /* s390 will always inject the page directly */
857 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
860 * s390 will always inject the page directly,
861 * but we still want check_async_completion to cleanup
866 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
869 struct kvm_arch_async_pf arch;
872 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
874 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
875 vcpu->arch.pfault_compare)
877 if (psw_extint_disabled(vcpu))
879 if (kvm_cpu_has_interrupt(vcpu))
881 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
883 if (!vcpu->arch.gmap->pfault_enabled)
886 hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
887 if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
890 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
894 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
899 * On s390 notifications for arriving pages will be delivered directly
900 * to the guest but the house keeping for completed pfaults is
901 * handled outside the worker.
903 kvm_check_async_pf_completion(vcpu);
905 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
910 if (test_thread_flag(TIF_MCCK_PENDING))
913 if (!kvm_is_ucontrol(vcpu->kvm))
914 kvm_s390_deliver_pending_interrupts(vcpu);
916 rc = kvm_s390_handle_requests(vcpu);
920 vcpu->arch.sie_block->icptcode = 0;
921 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
922 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
923 trace_kvm_s390_sie_enter(vcpu, cpuflags);
928 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
932 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
933 vcpu->arch.sie_block->icptcode);
934 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
936 if (exit_reason >= 0) {
938 } else if (kvm_is_ucontrol(vcpu->kvm)) {
939 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
940 vcpu->run->s390_ucontrol.trans_exc_code =
941 current->thread.gmap_addr;
942 vcpu->run->s390_ucontrol.pgm_code = 0x10;
945 } else if (current->thread.gmap_pfault) {
946 trace_kvm_s390_major_guest_pfault(vcpu);
947 current->thread.gmap_pfault = 0;
948 if (kvm_arch_setup_async_pf(vcpu) ||
949 (kvm_arch_fault_in_sync(vcpu) >= 0))
954 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
955 trace_kvm_s390_sie_fault(vcpu);
956 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
959 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
962 if (kvm_is_ucontrol(vcpu->kvm))
963 /* Don't exit for host interrupts. */
964 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
966 rc = kvm_handle_sie_intercept(vcpu);
972 bool kvm_enabled_cmma(void)
974 if (!MACHINE_IS_LPAR)
976 /* only enable for z10 and later */
977 if (!MACHINE_HAS_EDAT1)
982 static int __vcpu_run(struct kvm_vcpu *vcpu)
987 * We try to hold kvm->srcu during most of vcpu_run (except when run-
988 * ning the guest), so that memslots (and other stuff) are protected
990 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
993 rc = vcpu_pre_run(vcpu);
997 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
999 * As PF_VCPU will be used in fault handler, between
1000 * guest_enter and guest_exit should be no uaccess.
1005 exit_reason = sie64a(vcpu->arch.sie_block,
1006 vcpu->run->s.regs.gprs);
1008 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1010 rc = vcpu_post_run(vcpu, exit_reason);
1011 } while (!signal_pending(current) && !rc);
1013 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
1017 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1022 if (vcpu->sigset_active)
1023 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1025 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
1027 switch (kvm_run->exit_reason) {
1028 case KVM_EXIT_S390_SIEIC:
1029 case KVM_EXIT_UNKNOWN:
1031 case KVM_EXIT_S390_RESET:
1032 case KVM_EXIT_S390_UCONTROL:
1033 case KVM_EXIT_S390_TSCH:
1039 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
1040 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
1041 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
1042 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
1043 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1045 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
1046 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
1047 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
1048 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1052 rc = __vcpu_run(vcpu);
1054 if (signal_pending(current) && !rc) {
1055 kvm_run->exit_reason = KVM_EXIT_INTR;
1059 if (rc == -EOPNOTSUPP) {
1060 /* intercept cannot be handled in-kernel, prepare kvm-run */
1061 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
1062 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
1063 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
1064 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
1068 if (rc == -EREMOTE) {
1069 /* intercept was handled, but userspace support is needed
1070 * kvm_run has been prepared by the handler */
1074 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
1075 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
1076 kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix;
1077 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
1079 if (vcpu->sigset_active)
1080 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1082 vcpu->stat.exit_userspace++;
1086 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
1087 unsigned long n, int prefix)
1090 return copy_to_guest(vcpu, guestdest, from, n);
1092 return copy_to_guest_absolute(vcpu, guestdest, from, n);
1096 * store status at address
1097 * we use have two special cases:
1098 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
1099 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
1101 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
1103 unsigned char archmode = 1;
1107 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
1108 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
1110 addr = SAVE_AREA_BASE;
1112 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
1113 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
1115 addr = SAVE_AREA_BASE;
1120 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
1121 vcpu->arch.guest_fpregs.fprs, 128, prefix))
1124 if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
1125 vcpu->run->s.regs.gprs, 128, prefix))
1128 if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
1129 &vcpu->arch.sie_block->gpsw, 16, prefix))
1132 if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
1133 &vcpu->arch.sie_block->prefix, 4, prefix))
1136 if (__guestcopy(vcpu,
1137 addr + offsetof(struct save_area, fp_ctrl_reg),
1138 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
1141 if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
1142 &vcpu->arch.sie_block->todpr, 4, prefix))
1145 if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
1146 &vcpu->arch.sie_block->cputm, 8, prefix))
1149 clkcomp = vcpu->arch.sie_block->ckc >> 8;
1150 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
1151 &clkcomp, 8, prefix))
1154 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
1155 &vcpu->run->s.regs.acrs, 64, prefix))
1158 if (__guestcopy(vcpu,
1159 addr + offsetof(struct save_area, ctrl_regs),
1160 &vcpu->arch.sie_block->gcr, 128, prefix))
1165 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
1168 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
1169 * copying in vcpu load/put. Lets update our copies before we save
1170 * it into the save area
1172 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1173 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
1174 save_access_regs(vcpu->run->s.regs.acrs);
1176 return kvm_s390_store_status_unloaded(vcpu, addr);
1179 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
1180 struct kvm_enable_cap *cap)
1188 case KVM_CAP_S390_CSS_SUPPORT:
1189 if (!vcpu->kvm->arch.css_support) {
1190 vcpu->kvm->arch.css_support = 1;
1191 trace_kvm_s390_enable_css(vcpu->kvm);
1202 long kvm_arch_vcpu_ioctl(struct file *filp,
1203 unsigned int ioctl, unsigned long arg)
1205 struct kvm_vcpu *vcpu = filp->private_data;
1206 void __user *argp = (void __user *)arg;
1211 case KVM_S390_INTERRUPT: {
1212 struct kvm_s390_interrupt s390int;
1215 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1217 r = kvm_s390_inject_vcpu(vcpu, &s390int);
1220 case KVM_S390_STORE_STATUS:
1221 idx = srcu_read_lock(&vcpu->kvm->srcu);
1222 r = kvm_s390_vcpu_store_status(vcpu, arg);
1223 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1225 case KVM_S390_SET_INITIAL_PSW: {
1229 if (copy_from_user(&psw, argp, sizeof(psw)))
1231 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
1234 case KVM_S390_INITIAL_RESET:
1235 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
1237 case KVM_SET_ONE_REG:
1238 case KVM_GET_ONE_REG: {
1239 struct kvm_one_reg reg;
1241 if (copy_from_user(®, argp, sizeof(reg)))
1243 if (ioctl == KVM_SET_ONE_REG)
1244 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
1246 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
1249 #ifdef CONFIG_KVM_S390_UCONTROL
1250 case KVM_S390_UCAS_MAP: {
1251 struct kvm_s390_ucas_mapping ucasmap;
1253 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1258 if (!kvm_is_ucontrol(vcpu->kvm)) {
1263 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
1264 ucasmap.vcpu_addr, ucasmap.length);
1267 case KVM_S390_UCAS_UNMAP: {
1268 struct kvm_s390_ucas_mapping ucasmap;
1270 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1275 if (!kvm_is_ucontrol(vcpu->kvm)) {
1280 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
1285 case KVM_S390_VCPU_FAULT: {
1286 r = gmap_fault(arg, vcpu->arch.gmap);
1287 if (!IS_ERR_VALUE(r))
1291 case KVM_ENABLE_CAP:
1293 struct kvm_enable_cap cap;
1295 if (copy_from_user(&cap, argp, sizeof(cap)))
1297 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
1306 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1308 #ifdef CONFIG_KVM_S390_UCONTROL
1309 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
1310 && (kvm_is_ucontrol(vcpu->kvm))) {
1311 vmf->page = virt_to_page(vcpu->arch.sie_block);
1312 get_page(vmf->page);
1316 return VM_FAULT_SIGBUS;
1319 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1320 struct kvm_memory_slot *dont)
1324 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1325 unsigned long npages)
1330 void kvm_arch_memslots_updated(struct kvm *kvm)
1334 /* Section: memory related */
1335 int kvm_arch_prepare_memory_region(struct kvm *kvm,
1336 struct kvm_memory_slot *memslot,
1337 struct kvm_userspace_memory_region *mem,
1338 enum kvm_mr_change change)
1340 /* A few sanity checks. We can have memory slots which have to be
1341 located/ended at a segment boundary (1MB). The memory in userland is
1342 ok to be fragmented into various different vmas. It is okay to mmap()
1343 and munmap() stuff in this slot after doing this call at any time */
1345 if (mem->userspace_addr & 0xffffful)
1348 if (mem->memory_size & 0xffffful)
1354 void kvm_arch_commit_memory_region(struct kvm *kvm,
1355 struct kvm_userspace_memory_region *mem,
1356 const struct kvm_memory_slot *old,
1357 enum kvm_mr_change change)
1361 /* If the basics of the memslot do not change, we do not want
1362 * to update the gmap. Every update causes several unnecessary
1363 * segment translation exceptions. This is usually handled just
1364 * fine by the normal fault handler + gmap, but it will also
1365 * cause faults on the prefix page of running guest CPUs.
1367 if (old->userspace_addr == mem->userspace_addr &&
1368 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
1369 old->npages * PAGE_SIZE == mem->memory_size)
1372 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
1373 mem->guest_phys_addr, mem->memory_size);
1375 printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
1379 void kvm_arch_flush_shadow_all(struct kvm *kvm)
1383 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1384 struct kvm_memory_slot *slot)
1388 static int __init kvm_s390_init(void)
1391 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1396 * guests can ask for up to 255+1 double words, we need a full page
1397 * to hold the maximum amount of facilities. On the other hand, we
1398 * only set facilities that are known to work in KVM.
1400 vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
1405 memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
1406 vfacilities[0] &= 0xff82fff3f4fc2000UL;
1407 vfacilities[1] &= 0x005c000000000000UL;
1411 static void __exit kvm_s390_exit(void)
1413 free_page((unsigned long) vfacilities);
1417 module_init(kvm_s390_init);
1418 module_exit(kvm_s390_exit);
1421 * Enable autoloading of the kvm module.
1422 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
1423 * since x86 takes a different approach.
1425 #include <linux/miscdevice.h>
1426 MODULE_ALIAS_MISCDEV(KVM_MINOR);
1427 MODULE_ALIAS("devname:kvm");