2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
32 #include <asm/pgtable.h>
34 #include <asm/switch_to.h>
40 #define KMSG_COMPONENT "kvm-s390"
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
44 #define CREATE_TRACE_POINTS
46 #include "trace-s390.h"
48 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 (KVM_MAX_VCPUS + LOCAL_IRQS))
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 { "userspace_handled", VCPU_STAT(exit_userspace) },
57 { "exit_null", VCPU_STAT(exit_null) },
58 { "exit_validity", VCPU_STAT(exit_validity) },
59 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 { "exit_external_request", VCPU_STAT(exit_external_request) },
61 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 { "exit_instruction", VCPU_STAT(exit_instruction) },
63 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83 { "instruction_spx", VCPU_STAT(instruction_spx) },
84 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85 { "instruction_stap", VCPU_STAT(instruction_stap) },
86 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90 { "instruction_essa", VCPU_STAT(instruction_essa) },
91 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110 { "diagnose_10", VCPU_STAT(diagnose_10) },
111 { "diagnose_44", VCPU_STAT(diagnose_44) },
112 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113 { "diagnose_258", VCPU_STAT(diagnose_258) },
114 { "diagnose_308", VCPU_STAT(diagnose_308) },
115 { "diagnose_500", VCPU_STAT(diagnose_500) },
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121 0xffe6fffbfcfdfc40UL,
122 0x005e800000000000UL,
125 unsigned long kvm_s390_fac_list_mask_size(void)
127 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128 return ARRAY_SIZE(kvm_s390_fac_list_mask);
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
137 /* every s390 is virtualization enabled ;-) */
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
144 * This callback is executed during stop_machine(). All CPUs are therefore
145 * temporarily stopped. In order not to change guest behavior, we have to
146 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147 * so a CPU won't be stopped while calculating with the epoch.
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
153 struct kvm_vcpu *vcpu;
155 unsigned long long *delta = v;
157 list_for_each_entry(kvm, &vm_list, vm_list) {
158 kvm->arch.epoch -= *delta;
159 kvm_for_each_vcpu(i, vcpu, kvm) {
160 vcpu->arch.sie_block->epoch -= *delta;
166 static struct notifier_block kvm_clock_notifier = {
167 .notifier_call = kvm_clock_sync,
170 int kvm_arch_hardware_setup(void)
172 gmap_notifier.notifier_call = kvm_gmap_notifier;
173 gmap_register_ipte_notifier(&gmap_notifier);
174 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175 &kvm_clock_notifier);
179 void kvm_arch_hardware_unsetup(void)
181 gmap_unregister_ipte_notifier(&gmap_notifier);
182 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183 &kvm_clock_notifier);
186 int kvm_arch_init(void *opaque)
188 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
192 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193 debug_unregister(kvm_s390_dbf);
197 /* Register floating interrupt controller interface. */
198 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
201 void kvm_arch_exit(void)
203 debug_unregister(kvm_s390_dbf);
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208 unsigned int ioctl, unsigned long arg)
210 if (ioctl == KVM_S390_ENABLE_SIE)
211 return s390_enable_sie();
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
220 case KVM_CAP_S390_PSW:
221 case KVM_CAP_S390_GMAP:
222 case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224 case KVM_CAP_S390_UCONTROL:
226 case KVM_CAP_ASYNC_PF:
227 case KVM_CAP_SYNC_REGS:
228 case KVM_CAP_ONE_REG:
229 case KVM_CAP_ENABLE_CAP:
230 case KVM_CAP_S390_CSS_SUPPORT:
231 case KVM_CAP_IOEVENTFD:
232 case KVM_CAP_DEVICE_CTRL:
233 case KVM_CAP_ENABLE_CAP_VM:
234 case KVM_CAP_S390_IRQCHIP:
235 case KVM_CAP_VM_ATTRIBUTES:
236 case KVM_CAP_MP_STATE:
237 case KVM_CAP_S390_INJECT_IRQ:
238 case KVM_CAP_S390_USER_SIGP:
239 case KVM_CAP_S390_USER_STSI:
240 case KVM_CAP_S390_SKEYS:
241 case KVM_CAP_S390_IRQ_STATE:
244 case KVM_CAP_S390_MEM_OP:
247 case KVM_CAP_NR_VCPUS:
248 case KVM_CAP_MAX_VCPUS:
251 case KVM_CAP_NR_MEMSLOTS:
252 r = KVM_USER_MEM_SLOTS;
254 case KVM_CAP_S390_COW:
255 r = MACHINE_HAS_ESOP;
257 case KVM_CAP_S390_VECTOR_REGISTERS:
266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
267 struct kvm_memory_slot *memslot)
269 gfn_t cur_gfn, last_gfn;
270 unsigned long address;
271 struct gmap *gmap = kvm->arch.gmap;
273 down_read(&gmap->mm->mmap_sem);
274 /* Loop over all guest pages */
275 last_gfn = memslot->base_gfn + memslot->npages;
276 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
277 address = gfn_to_hva_memslot(memslot, cur_gfn);
279 if (gmap_test_and_clear_dirty(address, gmap))
280 mark_page_dirty(kvm, cur_gfn);
282 up_read(&gmap->mm->mmap_sem);
285 /* Section: vm related */
287 * Get (and clear) the dirty memory log for a memory slot.
289 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
290 struct kvm_dirty_log *log)
294 struct kvm_memslots *slots;
295 struct kvm_memory_slot *memslot;
298 mutex_lock(&kvm->slots_lock);
301 if (log->slot >= KVM_USER_MEM_SLOTS)
304 slots = kvm_memslots(kvm);
305 memslot = id_to_memslot(slots, log->slot);
307 if (!memslot->dirty_bitmap)
310 kvm_s390_sync_dirty_log(kvm, memslot);
311 r = kvm_get_dirty_log(kvm, log, &is_dirty);
315 /* Clear the dirty log */
317 n = kvm_dirty_bitmap_bytes(memslot);
318 memset(memslot->dirty_bitmap, 0, n);
322 mutex_unlock(&kvm->slots_lock);
326 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
334 case KVM_CAP_S390_IRQCHIP:
335 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
336 kvm->arch.use_irqchip = 1;
339 case KVM_CAP_S390_USER_SIGP:
340 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
341 kvm->arch.user_sigp = 1;
344 case KVM_CAP_S390_VECTOR_REGISTERS:
345 if (MACHINE_HAS_VX) {
346 set_kvm_facility(kvm->arch.model.fac->mask, 129);
347 set_kvm_facility(kvm->arch.model.fac->list, 129);
351 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
352 r ? "(not available)" : "(success)");
354 case KVM_CAP_S390_USER_STSI:
355 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
356 kvm->arch.user_stsi = 1;
366 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
370 switch (attr->attr) {
371 case KVM_S390_VM_MEM_LIMIT_SIZE:
373 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
374 kvm->arch.gmap->asce_end);
375 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
385 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
389 switch (attr->attr) {
390 case KVM_S390_VM_MEM_ENABLE_CMMA:
391 /* enable CMMA only for z10 and later (EDAT_1) */
393 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
397 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
398 mutex_lock(&kvm->lock);
399 if (atomic_read(&kvm->online_vcpus) == 0) {
400 kvm->arch.use_cmma = 1;
403 mutex_unlock(&kvm->lock);
405 case KVM_S390_VM_MEM_CLR_CMMA:
407 if (!kvm->arch.use_cmma)
410 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
411 mutex_lock(&kvm->lock);
412 idx = srcu_read_lock(&kvm->srcu);
413 s390_reset_cmma(kvm->arch.gmap->mm);
414 srcu_read_unlock(&kvm->srcu, idx);
415 mutex_unlock(&kvm->lock);
418 case KVM_S390_VM_MEM_LIMIT_SIZE: {
419 unsigned long new_limit;
421 if (kvm_is_ucontrol(kvm))
424 if (get_user(new_limit, (u64 __user *)attr->addr))
427 if (new_limit > kvm->arch.gmap->asce_end)
431 mutex_lock(&kvm->lock);
432 if (atomic_read(&kvm->online_vcpus) == 0) {
433 /* gmap_alloc will round the limit up */
434 struct gmap *new = gmap_alloc(current->mm, new_limit);
439 gmap_free(kvm->arch.gmap);
441 kvm->arch.gmap = new;
445 mutex_unlock(&kvm->lock);
446 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
456 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
458 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
460 struct kvm_vcpu *vcpu;
463 if (!test_kvm_facility(kvm, 76))
466 mutex_lock(&kvm->lock);
467 switch (attr->attr) {
468 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
470 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
471 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
472 kvm->arch.crypto.aes_kw = 1;
473 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
475 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
477 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
478 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
479 kvm->arch.crypto.dea_kw = 1;
480 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
482 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
483 kvm->arch.crypto.aes_kw = 0;
484 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
485 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
486 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
488 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
489 kvm->arch.crypto.dea_kw = 0;
490 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
491 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
492 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
495 mutex_unlock(&kvm->lock);
499 kvm_for_each_vcpu(i, vcpu, kvm) {
500 kvm_s390_vcpu_crypto_setup(vcpu);
503 mutex_unlock(&kvm->lock);
507 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
511 if (copy_from_user(>od_high, (void __user *)attr->addr,
517 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
522 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
526 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
529 kvm_s390_set_tod_clock(kvm, gtod);
530 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
534 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
541 switch (attr->attr) {
542 case KVM_S390_VM_TOD_HIGH:
543 ret = kvm_s390_set_tod_high(kvm, attr);
545 case KVM_S390_VM_TOD_LOW:
546 ret = kvm_s390_set_tod_low(kvm, attr);
555 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
559 if (copy_to_user((void __user *)attr->addr, >od_high,
562 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
567 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
571 gtod = kvm_s390_get_tod_clock_fast(kvm);
572 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
574 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
579 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
586 switch (attr->attr) {
587 case KVM_S390_VM_TOD_HIGH:
588 ret = kvm_s390_get_tod_high(kvm, attr);
590 case KVM_S390_VM_TOD_LOW:
591 ret = kvm_s390_get_tod_low(kvm, attr);
600 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
602 struct kvm_s390_vm_cpu_processor *proc;
605 mutex_lock(&kvm->lock);
606 if (atomic_read(&kvm->online_vcpus)) {
610 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
615 if (!copy_from_user(proc, (void __user *)attr->addr,
617 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
618 sizeof(struct cpuid));
619 kvm->arch.model.ibc = proc->ibc;
620 memcpy(kvm->arch.model.fac->list, proc->fac_list,
621 S390_ARCH_FAC_LIST_SIZE_BYTE);
626 mutex_unlock(&kvm->lock);
630 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
634 switch (attr->attr) {
635 case KVM_S390_VM_CPU_PROCESSOR:
636 ret = kvm_s390_set_processor(kvm, attr);
642 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
644 struct kvm_s390_vm_cpu_processor *proc;
647 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
652 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
653 proc->ibc = kvm->arch.model.ibc;
654 memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
655 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
662 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
664 struct kvm_s390_vm_cpu_machine *mach;
667 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
672 get_cpu_id((struct cpuid *) &mach->cpuid);
673 mach->ibc = sclp.ibc;
674 memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
675 S390_ARCH_FAC_LIST_SIZE_BYTE);
676 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
677 S390_ARCH_FAC_LIST_SIZE_BYTE);
678 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
685 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
689 switch (attr->attr) {
690 case KVM_S390_VM_CPU_PROCESSOR:
691 ret = kvm_s390_get_processor(kvm, attr);
693 case KVM_S390_VM_CPU_MACHINE:
694 ret = kvm_s390_get_machine(kvm, attr);
700 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
704 switch (attr->group) {
705 case KVM_S390_VM_MEM_CTRL:
706 ret = kvm_s390_set_mem_control(kvm, attr);
708 case KVM_S390_VM_TOD:
709 ret = kvm_s390_set_tod(kvm, attr);
711 case KVM_S390_VM_CPU_MODEL:
712 ret = kvm_s390_set_cpu_model(kvm, attr);
714 case KVM_S390_VM_CRYPTO:
715 ret = kvm_s390_vm_set_crypto(kvm, attr);
725 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
729 switch (attr->group) {
730 case KVM_S390_VM_MEM_CTRL:
731 ret = kvm_s390_get_mem_control(kvm, attr);
733 case KVM_S390_VM_TOD:
734 ret = kvm_s390_get_tod(kvm, attr);
736 case KVM_S390_VM_CPU_MODEL:
737 ret = kvm_s390_get_cpu_model(kvm, attr);
747 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
751 switch (attr->group) {
752 case KVM_S390_VM_MEM_CTRL:
753 switch (attr->attr) {
754 case KVM_S390_VM_MEM_ENABLE_CMMA:
755 case KVM_S390_VM_MEM_CLR_CMMA:
756 case KVM_S390_VM_MEM_LIMIT_SIZE:
764 case KVM_S390_VM_TOD:
765 switch (attr->attr) {
766 case KVM_S390_VM_TOD_LOW:
767 case KVM_S390_VM_TOD_HIGH:
775 case KVM_S390_VM_CPU_MODEL:
776 switch (attr->attr) {
777 case KVM_S390_VM_CPU_PROCESSOR:
778 case KVM_S390_VM_CPU_MACHINE:
786 case KVM_S390_VM_CRYPTO:
787 switch (attr->attr) {
788 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
789 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
790 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
791 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
807 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
811 unsigned long curkey;
814 if (args->flags != 0)
817 /* Is this guest using storage keys? */
818 if (!mm_use_skey(current->mm))
819 return KVM_S390_GET_SKEYS_NONE;
821 /* Enforce sane limit on memory allocation */
822 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
825 keys = kmalloc_array(args->count, sizeof(uint8_t),
826 GFP_KERNEL | __GFP_NOWARN);
828 keys = vmalloc(sizeof(uint8_t) * args->count);
832 for (i = 0; i < args->count; i++) {
833 hva = gfn_to_hva(kvm, args->start_gfn + i);
834 if (kvm_is_error_hva(hva)) {
839 curkey = get_guest_storage_key(current->mm, hva);
840 if (IS_ERR_VALUE(curkey)) {
847 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
848 sizeof(uint8_t) * args->count);
856 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
862 if (args->flags != 0)
865 /* Enforce sane limit on memory allocation */
866 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
869 keys = kmalloc_array(args->count, sizeof(uint8_t),
870 GFP_KERNEL | __GFP_NOWARN);
872 keys = vmalloc(sizeof(uint8_t) * args->count);
876 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
877 sizeof(uint8_t) * args->count);
883 /* Enable storage key handling for the guest */
884 r = s390_enable_skey();
888 for (i = 0; i < args->count; i++) {
889 hva = gfn_to_hva(kvm, args->start_gfn + i);
890 if (kvm_is_error_hva(hva)) {
895 /* Lowest order bit is reserved */
896 if (keys[i] & 0x01) {
901 r = set_guest_storage_key(current->mm, hva,
902 (unsigned long)keys[i], 0);
911 long kvm_arch_vm_ioctl(struct file *filp,
912 unsigned int ioctl, unsigned long arg)
914 struct kvm *kvm = filp->private_data;
915 void __user *argp = (void __user *)arg;
916 struct kvm_device_attr attr;
920 case KVM_S390_INTERRUPT: {
921 struct kvm_s390_interrupt s390int;
924 if (copy_from_user(&s390int, argp, sizeof(s390int)))
926 r = kvm_s390_inject_vm(kvm, &s390int);
929 case KVM_ENABLE_CAP: {
930 struct kvm_enable_cap cap;
932 if (copy_from_user(&cap, argp, sizeof(cap)))
934 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
937 case KVM_CREATE_IRQCHIP: {
938 struct kvm_irq_routing_entry routing;
941 if (kvm->arch.use_irqchip) {
942 /* Set up dummy routing. */
943 memset(&routing, 0, sizeof(routing));
944 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
948 case KVM_SET_DEVICE_ATTR: {
950 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
952 r = kvm_s390_vm_set_attr(kvm, &attr);
955 case KVM_GET_DEVICE_ATTR: {
957 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
959 r = kvm_s390_vm_get_attr(kvm, &attr);
962 case KVM_HAS_DEVICE_ATTR: {
964 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
966 r = kvm_s390_vm_has_attr(kvm, &attr);
969 case KVM_S390_GET_SKEYS: {
970 struct kvm_s390_skeys args;
973 if (copy_from_user(&args, argp,
974 sizeof(struct kvm_s390_skeys)))
976 r = kvm_s390_get_skeys(kvm, &args);
979 case KVM_S390_SET_SKEYS: {
980 struct kvm_s390_skeys args;
983 if (copy_from_user(&args, argp,
984 sizeof(struct kvm_s390_skeys)))
986 r = kvm_s390_set_skeys(kvm, &args);
996 static int kvm_s390_query_ap_config(u8 *config)
998 u32 fcn_code = 0x04000000UL;
1001 memset(config, 0, 128);
1005 ".long 0xb2af0000\n" /* PQAP(QCI) */
1011 : "r" (fcn_code), "r" (config)
1012 : "cc", "0", "2", "memory"
1018 static int kvm_s390_apxa_installed(void)
1023 if (test_facility(2) && test_facility(12)) {
1024 cc = kvm_s390_query_ap_config(config);
1027 pr_err("PQAP(QCI) failed with cc=%d", cc);
1029 return config[0] & 0x40;
1035 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1037 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1039 if (kvm_s390_apxa_installed())
1040 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1042 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1045 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1048 cpu_id->version = 0xff;
1051 static int kvm_s390_crypto_init(struct kvm *kvm)
1053 if (!test_kvm_facility(kvm, 76))
1056 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1057 GFP_KERNEL | GFP_DMA);
1058 if (!kvm->arch.crypto.crycb)
1061 kvm_s390_set_crycb_format(kvm);
1063 /* Enable AES/DEA protected key functions by default */
1064 kvm->arch.crypto.aes_kw = 1;
1065 kvm->arch.crypto.dea_kw = 1;
1066 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1067 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1068 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1069 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1074 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1077 char debug_name[16];
1078 static unsigned long sca_offset;
1081 #ifdef CONFIG_KVM_S390_UCONTROL
1082 if (type & ~KVM_VM_S390_UCONTROL)
1084 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1091 rc = s390_enable_sie();
1097 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1100 spin_lock(&kvm_lock);
1102 if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
1104 kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1105 spin_unlock(&kvm_lock);
1107 sprintf(debug_name, "kvm-%u", current->pid);
1109 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1114 * The architectural maximum amount of facilities is 16 kbit. To store
1115 * this amount, 2 kbyte of memory is required. Thus we need a full
1116 * page to hold the guest facility list (arch.model.fac->list) and the
1117 * facility mask (arch.model.fac->mask). Its address size has to be
1118 * 31 bits and word aligned.
1120 kvm->arch.model.fac =
1121 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1122 if (!kvm->arch.model.fac)
1125 /* Populate the facility mask initially. */
1126 memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1127 S390_ARCH_FAC_LIST_SIZE_BYTE);
1128 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1129 if (i < kvm_s390_fac_list_mask_size())
1130 kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1132 kvm->arch.model.fac->mask[i] = 0UL;
1135 /* Populate the facility list initially. */
1136 memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1137 S390_ARCH_FAC_LIST_SIZE_BYTE);
1139 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1140 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1142 if (kvm_s390_crypto_init(kvm) < 0)
1145 spin_lock_init(&kvm->arch.float_int.lock);
1146 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1147 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1148 init_waitqueue_head(&kvm->arch.ipte_wq);
1149 mutex_init(&kvm->arch.ipte_mutex);
1151 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1152 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1154 if (type & KVM_VM_S390_UCONTROL) {
1155 kvm->arch.gmap = NULL;
1157 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1158 if (!kvm->arch.gmap)
1160 kvm->arch.gmap->private = kvm;
1161 kvm->arch.gmap->pfault_enabled = 0;
1164 kvm->arch.css_support = 0;
1165 kvm->arch.use_irqchip = 0;
1166 kvm->arch.epoch = 0;
1168 spin_lock_init(&kvm->arch.start_stop_lock);
1169 KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1173 kfree(kvm->arch.crypto.crycb);
1174 free_page((unsigned long)kvm->arch.model.fac);
1175 debug_unregister(kvm->arch.dbf);
1176 free_page((unsigned long)(kvm->arch.sca));
1177 KVM_EVENT(3, "creation of vm failed: %d", rc);
1181 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1183 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1184 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1185 kvm_s390_clear_local_irqs(vcpu);
1186 kvm_clear_async_pf_completion_queue(vcpu);
1187 if (!kvm_is_ucontrol(vcpu->kvm)) {
1188 clear_bit(63 - vcpu->vcpu_id,
1189 (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1190 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1191 (__u64) vcpu->arch.sie_block)
1192 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1196 if (kvm_is_ucontrol(vcpu->kvm))
1197 gmap_free(vcpu->arch.gmap);
1199 if (vcpu->kvm->arch.use_cmma)
1200 kvm_s390_vcpu_unsetup_cmma(vcpu);
1201 free_page((unsigned long)(vcpu->arch.sie_block));
1203 kvm_vcpu_uninit(vcpu);
1204 kmem_cache_free(kvm_vcpu_cache, vcpu);
1207 static void kvm_free_vcpus(struct kvm *kvm)
1210 struct kvm_vcpu *vcpu;
1212 kvm_for_each_vcpu(i, vcpu, kvm)
1213 kvm_arch_vcpu_destroy(vcpu);
1215 mutex_lock(&kvm->lock);
1216 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1217 kvm->vcpus[i] = NULL;
1219 atomic_set(&kvm->online_vcpus, 0);
1220 mutex_unlock(&kvm->lock);
1223 void kvm_arch_destroy_vm(struct kvm *kvm)
1225 kvm_free_vcpus(kvm);
1226 free_page((unsigned long)kvm->arch.model.fac);
1227 free_page((unsigned long)(kvm->arch.sca));
1228 debug_unregister(kvm->arch.dbf);
1229 kfree(kvm->arch.crypto.crycb);
1230 if (!kvm_is_ucontrol(kvm))
1231 gmap_free(kvm->arch.gmap);
1232 kvm_s390_destroy_adapters(kvm);
1233 kvm_s390_clear_float_irqs(kvm);
1234 KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1237 /* Section: vcpu related */
1238 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1240 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1241 if (!vcpu->arch.gmap)
1243 vcpu->arch.gmap->private = vcpu->kvm;
1248 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1250 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1251 kvm_clear_async_pf_completion_queue(vcpu);
1252 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1258 if (test_kvm_facility(vcpu->kvm, 129))
1259 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1261 if (kvm_is_ucontrol(vcpu->kvm))
1262 return __kvm_ucontrol_vcpu_init(vcpu);
1268 * Backs up the current FP/VX register save area on a particular
1269 * destination. Used to switch between different register save
1272 static inline void save_fpu_to(struct fpu *dst)
1274 dst->fpc = current->thread.fpu.fpc;
1275 dst->flags = current->thread.fpu.flags;
1276 dst->regs = current->thread.fpu.regs;
1280 * Switches the FP/VX register save area from which to lazy
1281 * restore register contents.
1283 static inline void load_fpu_from(struct fpu *from)
1285 current->thread.fpu.fpc = from->fpc;
1286 current->thread.fpu.flags = from->flags;
1287 current->thread.fpu.regs = from->regs;
1290 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1292 /* Save host register state */
1294 save_fpu_to(&vcpu->arch.host_fpregs);
1296 if (test_kvm_facility(vcpu->kvm, 129)) {
1297 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1298 current->thread.fpu.flags = FPU_USE_VX;
1300 * Use the register save area in the SIE-control block
1301 * for register restore and save in kvm_arch_vcpu_put()
1303 current->thread.fpu.vxrs =
1304 (__vector128 *)&vcpu->run->s.regs.vrs;
1305 /* Always enable the vector extension for KVM */
1308 load_fpu_from(&vcpu->arch.guest_fpregs);
1310 if (test_fp_ctl(current->thread.fpu.fpc))
1311 /* User space provided an invalid FPC, let's clear it */
1312 current->thread.fpu.fpc = 0;
1314 save_access_regs(vcpu->arch.host_acrs);
1315 restore_access_regs(vcpu->run->s.regs.acrs);
1316 gmap_enable(vcpu->arch.gmap);
1317 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1320 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1322 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1323 gmap_disable(vcpu->arch.gmap);
1327 if (test_kvm_facility(vcpu->kvm, 129))
1329 * kvm_arch_vcpu_load() set up the register save area to
1330 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1331 * are already saved. Only the floating-point control must be
1334 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1336 save_fpu_to(&vcpu->arch.guest_fpregs);
1337 load_fpu_from(&vcpu->arch.host_fpregs);
1339 save_access_regs(vcpu->run->s.regs.acrs);
1340 restore_access_regs(vcpu->arch.host_acrs);
1343 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1345 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1346 vcpu->arch.sie_block->gpsw.mask = 0UL;
1347 vcpu->arch.sie_block->gpsw.addr = 0UL;
1348 kvm_s390_set_prefix(vcpu, 0);
1349 vcpu->arch.sie_block->cputm = 0UL;
1350 vcpu->arch.sie_block->ckc = 0UL;
1351 vcpu->arch.sie_block->todpr = 0;
1352 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1353 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1354 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1355 vcpu->arch.guest_fpregs.fpc = 0;
1356 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1357 vcpu->arch.sie_block->gbea = 1;
1358 vcpu->arch.sie_block->pp = 0;
1359 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1360 kvm_clear_async_pf_completion_queue(vcpu);
1361 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1362 kvm_s390_vcpu_stop(vcpu);
1363 kvm_s390_clear_local_irqs(vcpu);
1366 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1368 mutex_lock(&vcpu->kvm->lock);
1370 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1372 mutex_unlock(&vcpu->kvm->lock);
1373 if (!kvm_is_ucontrol(vcpu->kvm))
1374 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1377 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1379 if (!test_kvm_facility(vcpu->kvm, 76))
1382 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1384 if (vcpu->kvm->arch.crypto.aes_kw)
1385 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1386 if (vcpu->kvm->arch.crypto.dea_kw)
1387 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1389 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1392 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1394 free_page(vcpu->arch.sie_block->cbrlo);
1395 vcpu->arch.sie_block->cbrlo = 0;
1398 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1400 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1401 if (!vcpu->arch.sie_block->cbrlo)
1404 vcpu->arch.sie_block->ecb2 |= 0x80;
1405 vcpu->arch.sie_block->ecb2 &= ~0x08;
1409 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1411 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1413 vcpu->arch.cpu_id = model->cpu_id;
1414 vcpu->arch.sie_block->ibc = model->ibc;
1415 vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1418 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1422 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1426 if (test_kvm_facility(vcpu->kvm, 78))
1427 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1428 else if (test_kvm_facility(vcpu->kvm, 8))
1429 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1431 kvm_s390_vcpu_setup_model(vcpu);
1433 vcpu->arch.sie_block->ecb = 6;
1434 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1435 vcpu->arch.sie_block->ecb |= 0x10;
1437 vcpu->arch.sie_block->ecb2 = 8;
1438 vcpu->arch.sie_block->eca = 0xC1002000U;
1440 vcpu->arch.sie_block->eca |= 1;
1441 if (sclp.has_sigpif)
1442 vcpu->arch.sie_block->eca |= 0x10000000U;
1443 if (test_kvm_facility(vcpu->kvm, 129)) {
1444 vcpu->arch.sie_block->eca |= 0x00020000;
1445 vcpu->arch.sie_block->ecd |= 0x20000000;
1447 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1449 if (vcpu->kvm->arch.use_cmma) {
1450 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1454 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1455 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1457 kvm_s390_vcpu_crypto_setup(vcpu);
1462 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1465 struct kvm_vcpu *vcpu;
1466 struct sie_page *sie_page;
1469 if (id >= KVM_MAX_VCPUS)
1474 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1478 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1482 vcpu->arch.sie_block = &sie_page->sie_block;
1483 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1485 vcpu->arch.sie_block->icpua = id;
1486 if (!kvm_is_ucontrol(kvm)) {
1487 if (!kvm->arch.sca) {
1491 if (!kvm->arch.sca->cpu[id].sda)
1492 kvm->arch.sca->cpu[id].sda =
1493 (__u64) vcpu->arch.sie_block;
1494 vcpu->arch.sie_block->scaoh =
1495 (__u32)(((__u64)kvm->arch.sca) >> 32);
1496 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1497 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1500 spin_lock_init(&vcpu->arch.local_int.lock);
1501 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1502 vcpu->arch.local_int.wq = &vcpu->wq;
1503 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1506 * Allocate a save area for floating-point registers. If the vector
1507 * extension is available, register contents are saved in the SIE
1508 * control block. The allocated save area is still required in
1509 * particular places, for example, in kvm_s390_vcpu_store_status().
1511 vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1513 if (!vcpu->arch.guest_fpregs.fprs) {
1515 goto out_free_sie_block;
1518 rc = kvm_vcpu_init(vcpu, kvm, id);
1520 goto out_free_sie_block;
1521 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1522 vcpu->arch.sie_block);
1523 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1527 free_page((unsigned long)(vcpu->arch.sie_block));
1529 kmem_cache_free(kvm_vcpu_cache, vcpu);
1534 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1536 return kvm_s390_vcpu_has_irq(vcpu, 0);
1539 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1541 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1545 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1547 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1550 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1552 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1556 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1558 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1562 * Kick a guest cpu out of SIE and wait until SIE is not running.
1563 * If the CPU is not running (e.g. waiting as idle) the function will
1564 * return immediately. */
1565 void exit_sie(struct kvm_vcpu *vcpu)
1567 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1568 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1572 /* Kick a guest cpu out of SIE to process a request synchronously */
1573 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1575 kvm_make_request(req, vcpu);
1576 kvm_s390_vcpu_request(vcpu);
1579 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1582 struct kvm *kvm = gmap->private;
1583 struct kvm_vcpu *vcpu;
1585 kvm_for_each_vcpu(i, vcpu, kvm) {
1586 /* match against both prefix pages */
1587 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1588 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1589 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1594 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1596 /* kvm common code refers to this, but never calls it */
1601 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1602 struct kvm_one_reg *reg)
1607 case KVM_REG_S390_TODPR:
1608 r = put_user(vcpu->arch.sie_block->todpr,
1609 (u32 __user *)reg->addr);
1611 case KVM_REG_S390_EPOCHDIFF:
1612 r = put_user(vcpu->arch.sie_block->epoch,
1613 (u64 __user *)reg->addr);
1615 case KVM_REG_S390_CPU_TIMER:
1616 r = put_user(vcpu->arch.sie_block->cputm,
1617 (u64 __user *)reg->addr);
1619 case KVM_REG_S390_CLOCK_COMP:
1620 r = put_user(vcpu->arch.sie_block->ckc,
1621 (u64 __user *)reg->addr);
1623 case KVM_REG_S390_PFTOKEN:
1624 r = put_user(vcpu->arch.pfault_token,
1625 (u64 __user *)reg->addr);
1627 case KVM_REG_S390_PFCOMPARE:
1628 r = put_user(vcpu->arch.pfault_compare,
1629 (u64 __user *)reg->addr);
1631 case KVM_REG_S390_PFSELECT:
1632 r = put_user(vcpu->arch.pfault_select,
1633 (u64 __user *)reg->addr);
1635 case KVM_REG_S390_PP:
1636 r = put_user(vcpu->arch.sie_block->pp,
1637 (u64 __user *)reg->addr);
1639 case KVM_REG_S390_GBEA:
1640 r = put_user(vcpu->arch.sie_block->gbea,
1641 (u64 __user *)reg->addr);
1650 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1651 struct kvm_one_reg *reg)
1656 case KVM_REG_S390_TODPR:
1657 r = get_user(vcpu->arch.sie_block->todpr,
1658 (u32 __user *)reg->addr);
1660 case KVM_REG_S390_EPOCHDIFF:
1661 r = get_user(vcpu->arch.sie_block->epoch,
1662 (u64 __user *)reg->addr);
1664 case KVM_REG_S390_CPU_TIMER:
1665 r = get_user(vcpu->arch.sie_block->cputm,
1666 (u64 __user *)reg->addr);
1668 case KVM_REG_S390_CLOCK_COMP:
1669 r = get_user(vcpu->arch.sie_block->ckc,
1670 (u64 __user *)reg->addr);
1672 case KVM_REG_S390_PFTOKEN:
1673 r = get_user(vcpu->arch.pfault_token,
1674 (u64 __user *)reg->addr);
1675 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1676 kvm_clear_async_pf_completion_queue(vcpu);
1678 case KVM_REG_S390_PFCOMPARE:
1679 r = get_user(vcpu->arch.pfault_compare,
1680 (u64 __user *)reg->addr);
1682 case KVM_REG_S390_PFSELECT:
1683 r = get_user(vcpu->arch.pfault_select,
1684 (u64 __user *)reg->addr);
1686 case KVM_REG_S390_PP:
1687 r = get_user(vcpu->arch.sie_block->pp,
1688 (u64 __user *)reg->addr);
1690 case KVM_REG_S390_GBEA:
1691 r = get_user(vcpu->arch.sie_block->gbea,
1692 (u64 __user *)reg->addr);
1701 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1703 kvm_s390_vcpu_initial_reset(vcpu);
1707 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1709 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
1713 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1715 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1719 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1720 struct kvm_sregs *sregs)
1722 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1723 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1724 restore_access_regs(vcpu->run->s.regs.acrs);
1728 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1729 struct kvm_sregs *sregs)
1731 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1732 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1736 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1738 if (test_fp_ctl(fpu->fpc))
1740 memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1741 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1743 load_fpu_from(&vcpu->arch.guest_fpregs);
1747 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1749 memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1750 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1754 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1758 if (!is_vcpu_stopped(vcpu))
1761 vcpu->run->psw_mask = psw.mask;
1762 vcpu->run->psw_addr = psw.addr;
1767 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1768 struct kvm_translation *tr)
1770 return -EINVAL; /* not implemented yet */
1773 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1774 KVM_GUESTDBG_USE_HW_BP | \
1775 KVM_GUESTDBG_ENABLE)
1777 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1778 struct kvm_guest_debug *dbg)
1782 vcpu->guest_debug = 0;
1783 kvm_s390_clear_bp_data(vcpu);
1785 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1788 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1789 vcpu->guest_debug = dbg->control;
1790 /* enforce guest PER */
1791 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1793 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1794 rc = kvm_s390_import_bp_data(vcpu, dbg);
1796 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1797 vcpu->arch.guestdbg.last_bp = 0;
1801 vcpu->guest_debug = 0;
1802 kvm_s390_clear_bp_data(vcpu);
1803 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1809 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1810 struct kvm_mp_state *mp_state)
1812 /* CHECK_STOP and LOAD are not supported yet */
1813 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1814 KVM_MP_STATE_OPERATING;
1817 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1818 struct kvm_mp_state *mp_state)
1822 /* user space knows about this interface - let it control the state */
1823 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1825 switch (mp_state->mp_state) {
1826 case KVM_MP_STATE_STOPPED:
1827 kvm_s390_vcpu_stop(vcpu);
1829 case KVM_MP_STATE_OPERATING:
1830 kvm_s390_vcpu_start(vcpu);
1832 case KVM_MP_STATE_LOAD:
1833 case KVM_MP_STATE_CHECK_STOP:
1834 /* fall through - CHECK_STOP and LOAD are not supported yet */
1842 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1844 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1847 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1850 kvm_s390_vcpu_request_handled(vcpu);
1851 if (!vcpu->requests)
1854 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1855 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1856 * This ensures that the ipte instruction for this request has
1857 * already finished. We might race against a second unmapper that
1858 * wants to set the blocking bit. Lets just retry the request loop.
1860 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1862 rc = gmap_ipte_notify(vcpu->arch.gmap,
1863 kvm_s390_get_prefix(vcpu),
1870 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1871 vcpu->arch.sie_block->ihcpu = 0xffff;
1875 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1876 if (!ibs_enabled(vcpu)) {
1877 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1878 atomic_or(CPUSTAT_IBS,
1879 &vcpu->arch.sie_block->cpuflags);
1884 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1885 if (ibs_enabled(vcpu)) {
1886 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1887 atomic_andnot(CPUSTAT_IBS,
1888 &vcpu->arch.sie_block->cpuflags);
1893 /* nothing to do, just clear the request */
1894 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1899 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
1901 struct kvm_vcpu *vcpu;
1904 mutex_lock(&kvm->lock);
1906 kvm->arch.epoch = tod - get_tod_clock();
1907 kvm_s390_vcpu_block_all(kvm);
1908 kvm_for_each_vcpu(i, vcpu, kvm)
1909 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
1910 kvm_s390_vcpu_unblock_all(kvm);
1912 mutex_unlock(&kvm->lock);
1916 * kvm_arch_fault_in_page - fault-in guest page if necessary
1917 * @vcpu: The corresponding virtual cpu
1918 * @gpa: Guest physical address
1919 * @writable: Whether the page should be writable or not
1921 * Make sure that a guest page has been faulted-in on the host.
1923 * Return: Zero on success, negative error code otherwise.
1925 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1927 return gmap_fault(vcpu->arch.gmap, gpa,
1928 writable ? FAULT_FLAG_WRITE : 0);
1931 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1932 unsigned long token)
1934 struct kvm_s390_interrupt inti;
1935 struct kvm_s390_irq irq;
1938 irq.u.ext.ext_params2 = token;
1939 irq.type = KVM_S390_INT_PFAULT_INIT;
1940 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1942 inti.type = KVM_S390_INT_PFAULT_DONE;
1943 inti.parm64 = token;
1944 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1948 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1949 struct kvm_async_pf *work)
1951 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1952 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1955 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1956 struct kvm_async_pf *work)
1958 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1959 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1962 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1963 struct kvm_async_pf *work)
1965 /* s390 will always inject the page directly */
1968 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1971 * s390 will always inject the page directly,
1972 * but we still want check_async_completion to cleanup
1977 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1980 struct kvm_arch_async_pf arch;
1983 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1985 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1986 vcpu->arch.pfault_compare)
1988 if (psw_extint_disabled(vcpu))
1990 if (kvm_s390_vcpu_has_irq(vcpu, 0))
1992 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1994 if (!vcpu->arch.gmap->pfault_enabled)
1997 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1998 hva += current->thread.gmap_addr & ~PAGE_MASK;
1999 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2002 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2006 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2011 * On s390 notifications for arriving pages will be delivered directly
2012 * to the guest but the house keeping for completed pfaults is
2013 * handled outside the worker.
2015 kvm_check_async_pf_completion(vcpu);
2017 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2022 if (test_cpu_flag(CIF_MCCK_PENDING))
2025 if (!kvm_is_ucontrol(vcpu->kvm)) {
2026 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2031 rc = kvm_s390_handle_requests(vcpu);
2035 if (guestdbg_enabled(vcpu)) {
2036 kvm_s390_backup_guest_per_regs(vcpu);
2037 kvm_s390_patch_guest_per_regs(vcpu);
2040 vcpu->arch.sie_block->icptcode = 0;
2041 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2042 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2043 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2048 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2050 psw_t *psw = &vcpu->arch.sie_block->gpsw;
2054 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2055 trace_kvm_s390_sie_fault(vcpu);
2058 * We want to inject an addressing exception, which is defined as a
2059 * suppressing or terminating exception. However, since we came here
2060 * by a DAT access exception, the PSW still points to the faulting
2061 * instruction since DAT exceptions are nullifying. So we've got
2062 * to look up the current opcode to get the length of the instruction
2063 * to be able to forward the PSW.
2065 rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2067 return kvm_s390_inject_prog_cond(vcpu, rc);
2068 psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2070 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2073 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2077 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2078 vcpu->arch.sie_block->icptcode);
2079 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2081 if (guestdbg_enabled(vcpu))
2082 kvm_s390_restore_guest_per_regs(vcpu);
2084 if (exit_reason >= 0) {
2086 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2087 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2088 vcpu->run->s390_ucontrol.trans_exc_code =
2089 current->thread.gmap_addr;
2090 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2093 } else if (current->thread.gmap_pfault) {
2094 trace_kvm_s390_major_guest_pfault(vcpu);
2095 current->thread.gmap_pfault = 0;
2096 if (kvm_arch_setup_async_pf(vcpu)) {
2099 gpa_t gpa = current->thread.gmap_addr;
2100 rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2105 rc = vcpu_post_run_fault_in_sie(vcpu);
2107 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2110 if (kvm_is_ucontrol(vcpu->kvm))
2111 /* Don't exit for host interrupts. */
2112 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2114 rc = kvm_handle_sie_intercept(vcpu);
2120 static int __vcpu_run(struct kvm_vcpu *vcpu)
2122 int rc, exit_reason;
2125 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2126 * ning the guest), so that memslots (and other stuff) are protected
2128 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2131 rc = vcpu_pre_run(vcpu);
2135 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2137 * As PF_VCPU will be used in fault handler, between
2138 * guest_enter and guest_exit should be no uaccess.
2140 local_irq_disable();
2141 __kvm_guest_enter();
2143 exit_reason = sie64a(vcpu->arch.sie_block,
2144 vcpu->run->s.regs.gprs);
2145 local_irq_disable();
2148 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2150 rc = vcpu_post_run(vcpu, exit_reason);
2151 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2153 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2157 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2159 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2160 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2161 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2162 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2163 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2164 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2165 /* some control register changes require a tlb flush */
2166 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2168 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2169 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2170 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2171 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2172 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2173 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2175 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2176 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2177 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2178 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2179 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2180 kvm_clear_async_pf_completion_queue(vcpu);
2182 kvm_run->kvm_dirty_regs = 0;
2185 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2187 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2188 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2189 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2190 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2191 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2192 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2193 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2194 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2195 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2196 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2197 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2198 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2201 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2206 if (guestdbg_exit_pending(vcpu)) {
2207 kvm_s390_prepare_debug_exit(vcpu);
2211 if (vcpu->sigset_active)
2212 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2214 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2215 kvm_s390_vcpu_start(vcpu);
2216 } else if (is_vcpu_stopped(vcpu)) {
2217 pr_err_ratelimited("can't run stopped vcpu %d\n",
2222 sync_regs(vcpu, kvm_run);
2225 rc = __vcpu_run(vcpu);
2227 if (signal_pending(current) && !rc) {
2228 kvm_run->exit_reason = KVM_EXIT_INTR;
2232 if (guestdbg_exit_pending(vcpu) && !rc) {
2233 kvm_s390_prepare_debug_exit(vcpu);
2237 if (rc == -EOPNOTSUPP) {
2238 /* intercept cannot be handled in-kernel, prepare kvm-run */
2239 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
2240 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2241 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2242 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2246 if (rc == -EREMOTE) {
2247 /* intercept was handled, but userspace support is needed
2248 * kvm_run has been prepared by the handler */
2252 store_regs(vcpu, kvm_run);
2254 if (vcpu->sigset_active)
2255 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2257 vcpu->stat.exit_userspace++;
2262 * store status at address
2263 * we use have two special cases:
2264 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2265 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2267 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2269 unsigned char archmode = 1;
2274 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2275 if (write_guest_abs(vcpu, 163, &archmode, 1))
2277 gpa = SAVE_AREA_BASE;
2278 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2279 if (write_guest_real(vcpu, 163, &archmode, 1))
2281 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2283 rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2284 vcpu->arch.guest_fpregs.fprs, 128);
2285 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2286 vcpu->run->s.regs.gprs, 128);
2287 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2288 &vcpu->arch.sie_block->gpsw, 16);
2289 px = kvm_s390_get_prefix(vcpu);
2290 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2292 rc |= write_guest_abs(vcpu,
2293 gpa + offsetof(struct save_area, fp_ctrl_reg),
2294 &vcpu->arch.guest_fpregs.fpc, 4);
2295 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2296 &vcpu->arch.sie_block->todpr, 4);
2297 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2298 &vcpu->arch.sie_block->cputm, 8);
2299 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2300 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2302 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2303 &vcpu->run->s.regs.acrs, 64);
2304 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2305 &vcpu->arch.sie_block->gcr, 128);
2306 return rc ? -EFAULT : 0;
2309 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2312 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2313 * copying in vcpu load/put. Lets update our copies before we save
2314 * it into the save area
2317 if (test_kvm_facility(vcpu->kvm, 129)) {
2319 * If the vector extension is available, the vector registers
2320 * which overlaps with floating-point registers are saved in
2321 * the SIE-control block. Hence, extract the floating-point
2322 * registers and the FPC value and store them in the
2323 * guest_fpregs structure.
2325 WARN_ON(!is_vx_task(current)); /* XXX remove later */
2326 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2327 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2328 current->thread.fpu.vxrs);
2330 save_fpu_to(&vcpu->arch.guest_fpregs);
2331 save_access_regs(vcpu->run->s.regs.acrs);
2333 return kvm_s390_store_status_unloaded(vcpu, addr);
2337 * store additional status at address
2339 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2342 /* Only bits 0-53 are used for address formation */
2343 if (!(gpa & ~0x3ff))
2346 return write_guest_abs(vcpu, gpa & ~0x3ff,
2347 (void *)&vcpu->run->s.regs.vrs, 512);
2350 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2352 if (!test_kvm_facility(vcpu->kvm, 129))
2356 * The guest VXRS are in the host VXRs due to the lazy
2357 * copying in vcpu load/put. We can simply call save_fpu_regs()
2358 * to save the current register state because we are in the
2359 * middle of a load/put cycle.
2361 * Let's update our copies before we save it into the save area.
2365 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2368 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2370 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2371 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2374 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2377 struct kvm_vcpu *vcpu;
2379 kvm_for_each_vcpu(i, vcpu, kvm) {
2380 __disable_ibs_on_vcpu(vcpu);
2384 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2386 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2387 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2390 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2392 int i, online_vcpus, started_vcpus = 0;
2394 if (!is_vcpu_stopped(vcpu))
2397 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2398 /* Only one cpu at a time may enter/leave the STOPPED state. */
2399 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2400 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2402 for (i = 0; i < online_vcpus; i++) {
2403 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2407 if (started_vcpus == 0) {
2408 /* we're the only active VCPU -> speed it up */
2409 __enable_ibs_on_vcpu(vcpu);
2410 } else if (started_vcpus == 1) {
2412 * As we are starting a second VCPU, we have to disable
2413 * the IBS facility on all VCPUs to remove potentially
2414 * oustanding ENABLE requests.
2416 __disable_ibs_on_all_vcpus(vcpu->kvm);
2419 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2421 * Another VCPU might have used IBS while we were offline.
2422 * Let's play safe and flush the VCPU at startup.
2424 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2425 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2429 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2431 int i, online_vcpus, started_vcpus = 0;
2432 struct kvm_vcpu *started_vcpu = NULL;
2434 if (is_vcpu_stopped(vcpu))
2437 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2438 /* Only one cpu at a time may enter/leave the STOPPED state. */
2439 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2440 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2442 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2443 kvm_s390_clear_stop_irq(vcpu);
2445 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2446 __disable_ibs_on_vcpu(vcpu);
2448 for (i = 0; i < online_vcpus; i++) {
2449 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2451 started_vcpu = vcpu->kvm->vcpus[i];
2455 if (started_vcpus == 1) {
2457 * As we only have one VCPU left, we want to enable the
2458 * IBS facility for that VCPU to speed it up.
2460 __enable_ibs_on_vcpu(started_vcpu);
2463 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2467 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2468 struct kvm_enable_cap *cap)
2476 case KVM_CAP_S390_CSS_SUPPORT:
2477 if (!vcpu->kvm->arch.css_support) {
2478 vcpu->kvm->arch.css_support = 1;
2479 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2480 trace_kvm_s390_enable_css(vcpu->kvm);
2491 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2492 struct kvm_s390_mem_op *mop)
2494 void __user *uaddr = (void __user *)mop->buf;
2495 void *tmpbuf = NULL;
2497 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2498 | KVM_S390_MEMOP_F_CHECK_ONLY;
2500 if (mop->flags & ~supported_flags)
2503 if (mop->size > MEM_OP_MAX_SIZE)
2506 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2507 tmpbuf = vmalloc(mop->size);
2512 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2515 case KVM_S390_MEMOP_LOGICAL_READ:
2516 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2517 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2520 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2522 if (copy_to_user(uaddr, tmpbuf, mop->size))
2526 case KVM_S390_MEMOP_LOGICAL_WRITE:
2527 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2528 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2531 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2535 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2541 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2543 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2544 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2550 long kvm_arch_vcpu_ioctl(struct file *filp,
2551 unsigned int ioctl, unsigned long arg)
2553 struct kvm_vcpu *vcpu = filp->private_data;
2554 void __user *argp = (void __user *)arg;
2559 case KVM_S390_IRQ: {
2560 struct kvm_s390_irq s390irq;
2563 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2565 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2568 case KVM_S390_INTERRUPT: {
2569 struct kvm_s390_interrupt s390int;
2570 struct kvm_s390_irq s390irq;
2573 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2575 if (s390int_to_s390irq(&s390int, &s390irq))
2577 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2580 case KVM_S390_STORE_STATUS:
2581 idx = srcu_read_lock(&vcpu->kvm->srcu);
2582 r = kvm_s390_vcpu_store_status(vcpu, arg);
2583 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2585 case KVM_S390_SET_INITIAL_PSW: {
2589 if (copy_from_user(&psw, argp, sizeof(psw)))
2591 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2594 case KVM_S390_INITIAL_RESET:
2595 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2597 case KVM_SET_ONE_REG:
2598 case KVM_GET_ONE_REG: {
2599 struct kvm_one_reg reg;
2601 if (copy_from_user(®, argp, sizeof(reg)))
2603 if (ioctl == KVM_SET_ONE_REG)
2604 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
2606 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
2609 #ifdef CONFIG_KVM_S390_UCONTROL
2610 case KVM_S390_UCAS_MAP: {
2611 struct kvm_s390_ucas_mapping ucasmap;
2613 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2618 if (!kvm_is_ucontrol(vcpu->kvm)) {
2623 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2624 ucasmap.vcpu_addr, ucasmap.length);
2627 case KVM_S390_UCAS_UNMAP: {
2628 struct kvm_s390_ucas_mapping ucasmap;
2630 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2635 if (!kvm_is_ucontrol(vcpu->kvm)) {
2640 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2645 case KVM_S390_VCPU_FAULT: {
2646 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2649 case KVM_ENABLE_CAP:
2651 struct kvm_enable_cap cap;
2653 if (copy_from_user(&cap, argp, sizeof(cap)))
2655 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2658 case KVM_S390_MEM_OP: {
2659 struct kvm_s390_mem_op mem_op;
2661 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2662 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2667 case KVM_S390_SET_IRQ_STATE: {
2668 struct kvm_s390_irq_state irq_state;
2671 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2673 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2674 irq_state.len == 0 ||
2675 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2679 r = kvm_s390_set_irq_state(vcpu,
2680 (void __user *) irq_state.buf,
2684 case KVM_S390_GET_IRQ_STATE: {
2685 struct kvm_s390_irq_state irq_state;
2688 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2690 if (irq_state.len == 0) {
2694 r = kvm_s390_get_irq_state(vcpu,
2695 (__u8 __user *) irq_state.buf,
2705 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2707 #ifdef CONFIG_KVM_S390_UCONTROL
2708 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2709 && (kvm_is_ucontrol(vcpu->kvm))) {
2710 vmf->page = virt_to_page(vcpu->arch.sie_block);
2711 get_page(vmf->page);
2715 return VM_FAULT_SIGBUS;
2718 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2719 unsigned long npages)
2724 /* Section: memory related */
2725 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2726 struct kvm_memory_slot *memslot,
2727 const struct kvm_userspace_memory_region *mem,
2728 enum kvm_mr_change change)
2730 /* A few sanity checks. We can have memory slots which have to be
2731 located/ended at a segment boundary (1MB). The memory in userland is
2732 ok to be fragmented into various different vmas. It is okay to mmap()
2733 and munmap() stuff in this slot after doing this call at any time */
2735 if (mem->userspace_addr & 0xffffful)
2738 if (mem->memory_size & 0xffffful)
2744 void kvm_arch_commit_memory_region(struct kvm *kvm,
2745 const struct kvm_userspace_memory_region *mem,
2746 const struct kvm_memory_slot *old,
2747 const struct kvm_memory_slot *new,
2748 enum kvm_mr_change change)
2752 /* If the basics of the memslot do not change, we do not want
2753 * to update the gmap. Every update causes several unnecessary
2754 * segment translation exceptions. This is usually handled just
2755 * fine by the normal fault handler + gmap, but it will also
2756 * cause faults on the prefix page of running guest CPUs.
2758 if (old->userspace_addr == mem->userspace_addr &&
2759 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2760 old->npages * PAGE_SIZE == mem->memory_size)
2763 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2764 mem->guest_phys_addr, mem->memory_size);
2766 pr_warn("failed to commit memory region\n");
2770 static int __init kvm_s390_init(void)
2772 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2775 static void __exit kvm_s390_exit(void)
2780 module_init(kvm_s390_init);
2781 module_exit(kvm_s390_exit);
2784 * Enable autoloading of the kvm module.
2785 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2786 * since x86 takes a different approach.
2788 #include <linux/miscdevice.h>
2789 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2790 MODULE_ALIAS("devname:kvm");