1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2018
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #include <linux/compiler.h>
15 #include <linux/err.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
35 #include <asm/pgtable.h>
38 #include <asm/switch_to.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
46 #define KMSG_COMPONENT "kvm-s390"
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
50 #define CREATE_TRACE_POINTS
52 #include "trace-s390.h"
54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 (KVM_MAX_VCPUS + LOCAL_IRQS))
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 { "userspace_handled", VCPU_STAT(exit_userspace) },
63 { "exit_null", VCPU_STAT(exit_null) },
64 { "exit_validity", VCPU_STAT(exit_validity) },
65 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66 { "exit_external_request", VCPU_STAT(exit_external_request) },
67 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68 { "exit_instruction", VCPU_STAT(exit_instruction) },
69 { "exit_pei", VCPU_STAT(exit_pei) },
70 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89 { "deliver_io_interrupt", VCPU_STAT(deliver_io_int) },
90 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
91 { "instruction_epsw", VCPU_STAT(instruction_epsw) },
92 { "instruction_gs", VCPU_STAT(instruction_gs) },
93 { "instruction_io_other", VCPU_STAT(instruction_io_other) },
94 { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
95 { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
96 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
97 { "instruction_ptff", VCPU_STAT(instruction_ptff) },
98 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
99 { "instruction_sck", VCPU_STAT(instruction_sck) },
100 { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
101 { "instruction_spx", VCPU_STAT(instruction_spx) },
102 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
103 { "instruction_stap", VCPU_STAT(instruction_stap) },
104 { "instruction_iske", VCPU_STAT(instruction_iske) },
105 { "instruction_ri", VCPU_STAT(instruction_ri) },
106 { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
107 { "instruction_sske", VCPU_STAT(instruction_sske) },
108 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
109 { "instruction_essa", VCPU_STAT(instruction_essa) },
110 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
111 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
112 { "instruction_tb", VCPU_STAT(instruction_tb) },
113 { "instruction_tpi", VCPU_STAT(instruction_tpi) },
114 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
115 { "instruction_tsch", VCPU_STAT(instruction_tsch) },
116 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
117 { "instruction_sie", VCPU_STAT(instruction_sie) },
118 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
119 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
120 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
121 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
122 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
123 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
124 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
125 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
126 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
127 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
128 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
129 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
130 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
131 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
132 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
133 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
134 { "instruction_diag_10", VCPU_STAT(diagnose_10) },
135 { "instruction_diag_44", VCPU_STAT(diagnose_44) },
136 { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
137 { "instruction_diag_258", VCPU_STAT(diagnose_258) },
138 { "instruction_diag_308", VCPU_STAT(diagnose_308) },
139 { "instruction_diag_500", VCPU_STAT(diagnose_500) },
140 { "instruction_diag_other", VCPU_STAT(diagnose_other) },
144 struct kvm_s390_tod_clock_ext {
150 /* allow nested virtualization in KVM (if enabled by user space) */
152 module_param(nested, int, S_IRUGO);
153 MODULE_PARM_DESC(nested, "Nested virtualization support");
155 /* upper facilities limit for kvm */
156 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
158 unsigned long kvm_s390_fac_list_mask_size(void)
160 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
161 return ARRAY_SIZE(kvm_s390_fac_list_mask);
164 /* available cpu features supported by kvm */
165 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
166 /* available subfunctions indicated via query / "test bit" */
167 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
169 static struct gmap_notifier gmap_notifier;
170 static struct gmap_notifier vsie_gmap_notifier;
171 debug_info_t *kvm_s390_dbf;
173 /* Section: not file related */
174 int kvm_arch_hardware_enable(void)
176 /* every s390 is virtualization enabled ;-) */
180 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
183 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
188 * The TOD jumps by delta, we have to compensate this by adding
189 * -delta to the epoch.
193 /* sign-extension - we're adding to signed values below */
198 if (scb->ecd & ECD_MEF) {
199 scb->epdx += delta_idx;
200 if (scb->epoch < delta)
206 * This callback is executed during stop_machine(). All CPUs are therefore
207 * temporarily stopped. In order not to change guest behavior, we have to
208 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
209 * so a CPU won't be stopped while calculating with the epoch.
211 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
215 struct kvm_vcpu *vcpu;
217 unsigned long long *delta = v;
219 list_for_each_entry(kvm, &vm_list, vm_list) {
220 kvm_for_each_vcpu(i, vcpu, kvm) {
221 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
223 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
224 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
226 if (vcpu->arch.cputm_enabled)
227 vcpu->arch.cputm_start += *delta;
228 if (vcpu->arch.vsie_block)
229 kvm_clock_sync_scb(vcpu->arch.vsie_block,
236 static struct notifier_block kvm_clock_notifier = {
237 .notifier_call = kvm_clock_sync,
240 int kvm_arch_hardware_setup(void)
242 gmap_notifier.notifier_call = kvm_gmap_notifier;
243 gmap_register_pte_notifier(&gmap_notifier);
244 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
245 gmap_register_pte_notifier(&vsie_gmap_notifier);
246 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
247 &kvm_clock_notifier);
251 void kvm_arch_hardware_unsetup(void)
253 gmap_unregister_pte_notifier(&gmap_notifier);
254 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
255 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
256 &kvm_clock_notifier);
259 static void allow_cpu_feat(unsigned long nr)
261 set_bit_inv(nr, kvm_s390_available_cpu_feat);
264 static inline int plo_test_bit(unsigned char nr)
266 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
270 /* Parameter registers are ignored for "test bit" */
280 static void kvm_s390_cpu_feat_init(void)
284 for (i = 0; i < 256; ++i) {
286 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
289 if (test_facility(28)) /* TOD-clock steering */
290 ptff(kvm_s390_available_subfunc.ptff,
291 sizeof(kvm_s390_available_subfunc.ptff),
294 if (test_facility(17)) { /* MSA */
295 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
296 kvm_s390_available_subfunc.kmac);
297 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
298 kvm_s390_available_subfunc.kmc);
299 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
300 kvm_s390_available_subfunc.km);
301 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
302 kvm_s390_available_subfunc.kimd);
303 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
304 kvm_s390_available_subfunc.klmd);
306 if (test_facility(76)) /* MSA3 */
307 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
308 kvm_s390_available_subfunc.pckmo);
309 if (test_facility(77)) { /* MSA4 */
310 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
311 kvm_s390_available_subfunc.kmctr);
312 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
313 kvm_s390_available_subfunc.kmf);
314 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
315 kvm_s390_available_subfunc.kmo);
316 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
317 kvm_s390_available_subfunc.pcc);
319 if (test_facility(57)) /* MSA5 */
320 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
321 kvm_s390_available_subfunc.ppno);
323 if (test_facility(146)) /* MSA8 */
324 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
325 kvm_s390_available_subfunc.kma);
327 if (MACHINE_HAS_ESOP)
328 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
330 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
331 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
333 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
334 !test_facility(3) || !nested)
336 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
337 if (sclp.has_64bscao)
338 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
340 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
342 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
344 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
346 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
348 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
350 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
352 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
354 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
355 * all skey handling functions read/set the skey from the PGSTE
356 * instead of the real storage key.
358 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
359 * pages being detected as preserved although they are resident.
361 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
362 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
364 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
365 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
366 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
368 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
369 * cannot easily shadow the SCA because of the ipte lock.
373 int kvm_arch_init(void *opaque)
375 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
379 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
380 debug_unregister(kvm_s390_dbf);
384 kvm_s390_cpu_feat_init();
386 /* Register floating interrupt controller interface. */
387 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
390 void kvm_arch_exit(void)
392 debug_unregister(kvm_s390_dbf);
395 /* Section: device related */
396 long kvm_arch_dev_ioctl(struct file *filp,
397 unsigned int ioctl, unsigned long arg)
399 if (ioctl == KVM_S390_ENABLE_SIE)
400 return s390_enable_sie();
404 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
409 case KVM_CAP_S390_PSW:
410 case KVM_CAP_S390_GMAP:
411 case KVM_CAP_SYNC_MMU:
412 #ifdef CONFIG_KVM_S390_UCONTROL
413 case KVM_CAP_S390_UCONTROL:
415 case KVM_CAP_ASYNC_PF:
416 case KVM_CAP_SYNC_REGS:
417 case KVM_CAP_ONE_REG:
418 case KVM_CAP_ENABLE_CAP:
419 case KVM_CAP_S390_CSS_SUPPORT:
420 case KVM_CAP_IOEVENTFD:
421 case KVM_CAP_DEVICE_CTRL:
422 case KVM_CAP_ENABLE_CAP_VM:
423 case KVM_CAP_S390_IRQCHIP:
424 case KVM_CAP_VM_ATTRIBUTES:
425 case KVM_CAP_MP_STATE:
426 case KVM_CAP_IMMEDIATE_EXIT:
427 case KVM_CAP_S390_INJECT_IRQ:
428 case KVM_CAP_S390_USER_SIGP:
429 case KVM_CAP_S390_USER_STSI:
430 case KVM_CAP_S390_SKEYS:
431 case KVM_CAP_S390_IRQ_STATE:
432 case KVM_CAP_S390_USER_INSTR0:
433 case KVM_CAP_S390_CMMA_MIGRATION:
434 case KVM_CAP_S390_AIS:
435 case KVM_CAP_S390_AIS_MIGRATION:
438 case KVM_CAP_S390_MEM_OP:
441 case KVM_CAP_NR_VCPUS:
442 case KVM_CAP_MAX_VCPUS:
443 r = KVM_S390_BSCA_CPU_SLOTS;
444 if (!kvm_s390_use_sca_entries())
446 else if (sclp.has_esca && sclp.has_64bscao)
447 r = KVM_S390_ESCA_CPU_SLOTS;
449 case KVM_CAP_NR_MEMSLOTS:
450 r = KVM_USER_MEM_SLOTS;
452 case KVM_CAP_S390_COW:
453 r = MACHINE_HAS_ESOP;
455 case KVM_CAP_S390_VECTOR_REGISTERS:
458 case KVM_CAP_S390_RI:
459 r = test_facility(64);
461 case KVM_CAP_S390_GS:
462 r = test_facility(133);
464 case KVM_CAP_S390_BPB:
465 r = test_facility(82);
473 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
474 struct kvm_memory_slot *memslot)
476 gfn_t cur_gfn, last_gfn;
477 unsigned long address;
478 struct gmap *gmap = kvm->arch.gmap;
480 /* Loop over all guest pages */
481 last_gfn = memslot->base_gfn + memslot->npages;
482 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
483 address = gfn_to_hva_memslot(memslot, cur_gfn);
485 if (test_and_clear_guest_dirty(gmap->mm, address))
486 mark_page_dirty(kvm, cur_gfn);
487 if (fatal_signal_pending(current))
493 /* Section: vm related */
494 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
497 * Get (and clear) the dirty memory log for a memory slot.
499 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
500 struct kvm_dirty_log *log)
504 struct kvm_memslots *slots;
505 struct kvm_memory_slot *memslot;
508 if (kvm_is_ucontrol(kvm))
511 mutex_lock(&kvm->slots_lock);
514 if (log->slot >= KVM_USER_MEM_SLOTS)
517 slots = kvm_memslots(kvm);
518 memslot = id_to_memslot(slots, log->slot);
520 if (!memslot->dirty_bitmap)
523 kvm_s390_sync_dirty_log(kvm, memslot);
524 r = kvm_get_dirty_log(kvm, log, &is_dirty);
528 /* Clear the dirty log */
530 n = kvm_dirty_bitmap_bytes(memslot);
531 memset(memslot->dirty_bitmap, 0, n);
535 mutex_unlock(&kvm->slots_lock);
539 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
542 struct kvm_vcpu *vcpu;
544 kvm_for_each_vcpu(i, vcpu, kvm) {
545 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
549 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
557 case KVM_CAP_S390_IRQCHIP:
558 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
559 kvm->arch.use_irqchip = 1;
562 case KVM_CAP_S390_USER_SIGP:
563 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
564 kvm->arch.user_sigp = 1;
567 case KVM_CAP_S390_VECTOR_REGISTERS:
568 mutex_lock(&kvm->lock);
569 if (kvm->created_vcpus) {
571 } else if (MACHINE_HAS_VX) {
572 set_kvm_facility(kvm->arch.model.fac_mask, 129);
573 set_kvm_facility(kvm->arch.model.fac_list, 129);
574 if (test_facility(134)) {
575 set_kvm_facility(kvm->arch.model.fac_mask, 134);
576 set_kvm_facility(kvm->arch.model.fac_list, 134);
578 if (test_facility(135)) {
579 set_kvm_facility(kvm->arch.model.fac_mask, 135);
580 set_kvm_facility(kvm->arch.model.fac_list, 135);
585 mutex_unlock(&kvm->lock);
586 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
587 r ? "(not available)" : "(success)");
589 case KVM_CAP_S390_RI:
591 mutex_lock(&kvm->lock);
592 if (kvm->created_vcpus) {
594 } else if (test_facility(64)) {
595 set_kvm_facility(kvm->arch.model.fac_mask, 64);
596 set_kvm_facility(kvm->arch.model.fac_list, 64);
599 mutex_unlock(&kvm->lock);
600 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
601 r ? "(not available)" : "(success)");
603 case KVM_CAP_S390_AIS:
604 mutex_lock(&kvm->lock);
605 if (kvm->created_vcpus) {
608 set_kvm_facility(kvm->arch.model.fac_mask, 72);
609 set_kvm_facility(kvm->arch.model.fac_list, 72);
612 mutex_unlock(&kvm->lock);
613 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
614 r ? "(not available)" : "(success)");
616 case KVM_CAP_S390_GS:
618 mutex_lock(&kvm->lock);
619 if (kvm->created_vcpus) {
621 } else if (test_facility(133)) {
622 set_kvm_facility(kvm->arch.model.fac_mask, 133);
623 set_kvm_facility(kvm->arch.model.fac_list, 133);
626 mutex_unlock(&kvm->lock);
627 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
628 r ? "(not available)" : "(success)");
630 case KVM_CAP_S390_USER_STSI:
631 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
632 kvm->arch.user_stsi = 1;
635 case KVM_CAP_S390_USER_INSTR0:
636 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
637 kvm->arch.user_instr0 = 1;
638 icpt_operexc_on_all_vcpus(kvm);
648 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
652 switch (attr->attr) {
653 case KVM_S390_VM_MEM_LIMIT_SIZE:
655 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
656 kvm->arch.mem_limit);
657 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
667 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
671 switch (attr->attr) {
672 case KVM_S390_VM_MEM_ENABLE_CMMA:
678 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
679 mutex_lock(&kvm->lock);
680 if (!kvm->created_vcpus) {
681 kvm->arch.use_cmma = 1;
684 mutex_unlock(&kvm->lock);
686 case KVM_S390_VM_MEM_CLR_CMMA:
691 if (!kvm->arch.use_cmma)
694 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
695 mutex_lock(&kvm->lock);
696 idx = srcu_read_lock(&kvm->srcu);
697 s390_reset_cmma(kvm->arch.gmap->mm);
698 srcu_read_unlock(&kvm->srcu, idx);
699 mutex_unlock(&kvm->lock);
702 case KVM_S390_VM_MEM_LIMIT_SIZE: {
703 unsigned long new_limit;
705 if (kvm_is_ucontrol(kvm))
708 if (get_user(new_limit, (u64 __user *)attr->addr))
711 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
712 new_limit > kvm->arch.mem_limit)
718 /* gmap_create takes last usable address */
719 if (new_limit != KVM_S390_NO_MEM_LIMIT)
723 mutex_lock(&kvm->lock);
724 if (!kvm->created_vcpus) {
725 /* gmap_create will round the limit up */
726 struct gmap *new = gmap_create(current->mm, new_limit);
731 gmap_remove(kvm->arch.gmap);
733 kvm->arch.gmap = new;
737 mutex_unlock(&kvm->lock);
738 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
739 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
740 (void *) kvm->arch.gmap->asce);
750 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
752 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
754 struct kvm_vcpu *vcpu;
757 if (!test_kvm_facility(kvm, 76))
760 mutex_lock(&kvm->lock);
761 switch (attr->attr) {
762 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
764 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
765 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
766 kvm->arch.crypto.aes_kw = 1;
767 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
769 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
771 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
772 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
773 kvm->arch.crypto.dea_kw = 1;
774 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
776 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
777 kvm->arch.crypto.aes_kw = 0;
778 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
779 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
780 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
782 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
783 kvm->arch.crypto.dea_kw = 0;
784 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
785 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
786 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
789 mutex_unlock(&kvm->lock);
793 kvm_for_each_vcpu(i, vcpu, kvm) {
794 kvm_s390_vcpu_crypto_setup(vcpu);
797 mutex_unlock(&kvm->lock);
801 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
804 struct kvm_vcpu *vcpu;
806 kvm_for_each_vcpu(cx, vcpu, kvm)
807 kvm_s390_sync_request(req, vcpu);
811 * Must be called with kvm->srcu held to avoid races on memslots, and with
812 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
814 static int kvm_s390_vm_start_migration(struct kvm *kvm)
816 struct kvm_s390_migration_state *mgs;
817 struct kvm_memory_slot *ms;
818 /* should be the only one */
819 struct kvm_memslots *slots;
820 unsigned long ram_pages;
823 /* migration mode already enabled */
824 if (kvm->arch.migration_state)
827 slots = kvm_memslots(kvm);
828 if (!slots || !slots->used_slots)
831 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
834 kvm->arch.migration_state = mgs;
836 if (kvm->arch.use_cmma) {
838 * Get the first slot. They are reverse sorted by base_gfn, so
839 * the first slot is also the one at the end of the address
840 * space. We have verified above that at least one slot is
843 ms = slots->memslots;
844 /* round up so we only use full longs */
845 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
846 /* allocate enough bytes to store all the bits */
847 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
848 if (!mgs->pgste_bitmap) {
850 kvm->arch.migration_state = NULL;
854 mgs->bitmap_size = ram_pages;
855 atomic64_set(&mgs->dirty_pages, ram_pages);
856 /* mark all the pages in active slots as dirty */
857 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
858 ms = slots->memslots + slotnr;
859 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
862 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
868 * Must be called with kvm->slots_lock to avoid races with ourselves and
869 * kvm_s390_vm_start_migration.
871 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
873 struct kvm_s390_migration_state *mgs;
875 /* migration mode already disabled */
876 if (!kvm->arch.migration_state)
878 mgs = kvm->arch.migration_state;
879 kvm->arch.migration_state = NULL;
881 if (kvm->arch.use_cmma) {
882 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
883 /* We have to wait for the essa emulation to finish */
884 synchronize_srcu(&kvm->srcu);
885 vfree(mgs->pgste_bitmap);
891 static int kvm_s390_vm_set_migration(struct kvm *kvm,
892 struct kvm_device_attr *attr)
896 mutex_lock(&kvm->slots_lock);
897 switch (attr->attr) {
898 case KVM_S390_VM_MIGRATION_START:
899 res = kvm_s390_vm_start_migration(kvm);
901 case KVM_S390_VM_MIGRATION_STOP:
902 res = kvm_s390_vm_stop_migration(kvm);
907 mutex_unlock(&kvm->slots_lock);
912 static int kvm_s390_vm_get_migration(struct kvm *kvm,
913 struct kvm_device_attr *attr)
915 u64 mig = (kvm->arch.migration_state != NULL);
917 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
920 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
925 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
927 struct kvm_s390_vm_tod_clock gtod;
929 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
932 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
934 kvm_s390_set_tod_clock(kvm, >od);
936 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
937 gtod.epoch_idx, gtod.tod);
942 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
946 if (copy_from_user(>od_high, (void __user *)attr->addr,
952 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
957 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
959 struct kvm_s390_vm_tod_clock gtod = { 0 };
961 if (copy_from_user(>od.tod, (void __user *)attr->addr,
965 kvm_s390_set_tod_clock(kvm, >od);
966 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
970 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
977 switch (attr->attr) {
978 case KVM_S390_VM_TOD_EXT:
979 ret = kvm_s390_set_tod_ext(kvm, attr);
981 case KVM_S390_VM_TOD_HIGH:
982 ret = kvm_s390_set_tod_high(kvm, attr);
984 case KVM_S390_VM_TOD_LOW:
985 ret = kvm_s390_set_tod_low(kvm, attr);
994 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
995 struct kvm_s390_vm_tod_clock *gtod)
997 struct kvm_s390_tod_clock_ext htod;
1001 get_tod_clock_ext((char *)&htod);
1003 gtod->tod = htod.tod + kvm->arch.epoch;
1004 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1006 if (gtod->tod < htod.tod)
1007 gtod->epoch_idx += 1;
1012 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1014 struct kvm_s390_vm_tod_clock gtod;
1016 memset(>od, 0, sizeof(gtod));
1018 if (test_kvm_facility(kvm, 139))
1019 kvm_s390_get_tod_clock_ext(kvm, >od);
1021 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
1023 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1026 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1027 gtod.epoch_idx, gtod.tod);
1031 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1035 if (copy_to_user((void __user *)attr->addr, >od_high,
1038 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1043 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1047 gtod = kvm_s390_get_tod_clock_fast(kvm);
1048 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1050 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1055 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1062 switch (attr->attr) {
1063 case KVM_S390_VM_TOD_EXT:
1064 ret = kvm_s390_get_tod_ext(kvm, attr);
1066 case KVM_S390_VM_TOD_HIGH:
1067 ret = kvm_s390_get_tod_high(kvm, attr);
1069 case KVM_S390_VM_TOD_LOW:
1070 ret = kvm_s390_get_tod_low(kvm, attr);
1079 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1081 struct kvm_s390_vm_cpu_processor *proc;
1082 u16 lowest_ibc, unblocked_ibc;
1085 mutex_lock(&kvm->lock);
1086 if (kvm->created_vcpus) {
1090 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1095 if (!copy_from_user(proc, (void __user *)attr->addr,
1097 kvm->arch.model.cpuid = proc->cpuid;
1098 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1099 unblocked_ibc = sclp.ibc & 0xfff;
1100 if (lowest_ibc && proc->ibc) {
1101 if (proc->ibc > unblocked_ibc)
1102 kvm->arch.model.ibc = unblocked_ibc;
1103 else if (proc->ibc < lowest_ibc)
1104 kvm->arch.model.ibc = lowest_ibc;
1106 kvm->arch.model.ibc = proc->ibc;
1108 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1109 S390_ARCH_FAC_LIST_SIZE_BYTE);
1110 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1111 kvm->arch.model.ibc,
1112 kvm->arch.model.cpuid);
1113 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1114 kvm->arch.model.fac_list[0],
1115 kvm->arch.model.fac_list[1],
1116 kvm->arch.model.fac_list[2]);
1121 mutex_unlock(&kvm->lock);
1125 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1126 struct kvm_device_attr *attr)
1128 struct kvm_s390_vm_cpu_feat data;
1130 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1132 if (!bitmap_subset((unsigned long *) data.feat,
1133 kvm_s390_available_cpu_feat,
1134 KVM_S390_VM_CPU_FEAT_NR_BITS))
1137 mutex_lock(&kvm->lock);
1138 if (kvm->created_vcpus) {
1139 mutex_unlock(&kvm->lock);
1142 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1143 KVM_S390_VM_CPU_FEAT_NR_BITS);
1144 mutex_unlock(&kvm->lock);
1145 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1152 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1153 struct kvm_device_attr *attr)
1156 * Once supported by kernel + hw, we have to store the subfunctions
1157 * in kvm->arch and remember that user space configured them.
1162 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1166 switch (attr->attr) {
1167 case KVM_S390_VM_CPU_PROCESSOR:
1168 ret = kvm_s390_set_processor(kvm, attr);
1170 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1171 ret = kvm_s390_set_processor_feat(kvm, attr);
1173 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1174 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1180 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1182 struct kvm_s390_vm_cpu_processor *proc;
1185 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1190 proc->cpuid = kvm->arch.model.cpuid;
1191 proc->ibc = kvm->arch.model.ibc;
1192 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1193 S390_ARCH_FAC_LIST_SIZE_BYTE);
1194 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1195 kvm->arch.model.ibc,
1196 kvm->arch.model.cpuid);
1197 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1198 kvm->arch.model.fac_list[0],
1199 kvm->arch.model.fac_list[1],
1200 kvm->arch.model.fac_list[2]);
1201 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1208 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1210 struct kvm_s390_vm_cpu_machine *mach;
1213 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1218 get_cpu_id((struct cpuid *) &mach->cpuid);
1219 mach->ibc = sclp.ibc;
1220 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1221 S390_ARCH_FAC_LIST_SIZE_BYTE);
1222 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1223 sizeof(S390_lowcore.stfle_fac_list));
1224 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1225 kvm->arch.model.ibc,
1226 kvm->arch.model.cpuid);
1227 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1231 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1235 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1242 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1243 struct kvm_device_attr *attr)
1245 struct kvm_s390_vm_cpu_feat data;
1247 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1248 KVM_S390_VM_CPU_FEAT_NR_BITS);
1249 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1251 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1258 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1259 struct kvm_device_attr *attr)
1261 struct kvm_s390_vm_cpu_feat data;
1263 bitmap_copy((unsigned long *) data.feat,
1264 kvm_s390_available_cpu_feat,
1265 KVM_S390_VM_CPU_FEAT_NR_BITS);
1266 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1268 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1275 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1276 struct kvm_device_attr *attr)
1279 * Once we can actually configure subfunctions (kernel + hw support),
1280 * we have to check if they were already set by user space, if so copy
1281 * them from kvm->arch.
1286 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1287 struct kvm_device_attr *attr)
1289 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1290 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1294 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1298 switch (attr->attr) {
1299 case KVM_S390_VM_CPU_PROCESSOR:
1300 ret = kvm_s390_get_processor(kvm, attr);
1302 case KVM_S390_VM_CPU_MACHINE:
1303 ret = kvm_s390_get_machine(kvm, attr);
1305 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1306 ret = kvm_s390_get_processor_feat(kvm, attr);
1308 case KVM_S390_VM_CPU_MACHINE_FEAT:
1309 ret = kvm_s390_get_machine_feat(kvm, attr);
1311 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1312 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1314 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1315 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1321 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1325 switch (attr->group) {
1326 case KVM_S390_VM_MEM_CTRL:
1327 ret = kvm_s390_set_mem_control(kvm, attr);
1329 case KVM_S390_VM_TOD:
1330 ret = kvm_s390_set_tod(kvm, attr);
1332 case KVM_S390_VM_CPU_MODEL:
1333 ret = kvm_s390_set_cpu_model(kvm, attr);
1335 case KVM_S390_VM_CRYPTO:
1336 ret = kvm_s390_vm_set_crypto(kvm, attr);
1338 case KVM_S390_VM_MIGRATION:
1339 ret = kvm_s390_vm_set_migration(kvm, attr);
1349 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1353 switch (attr->group) {
1354 case KVM_S390_VM_MEM_CTRL:
1355 ret = kvm_s390_get_mem_control(kvm, attr);
1357 case KVM_S390_VM_TOD:
1358 ret = kvm_s390_get_tod(kvm, attr);
1360 case KVM_S390_VM_CPU_MODEL:
1361 ret = kvm_s390_get_cpu_model(kvm, attr);
1363 case KVM_S390_VM_MIGRATION:
1364 ret = kvm_s390_vm_get_migration(kvm, attr);
1374 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1378 switch (attr->group) {
1379 case KVM_S390_VM_MEM_CTRL:
1380 switch (attr->attr) {
1381 case KVM_S390_VM_MEM_ENABLE_CMMA:
1382 case KVM_S390_VM_MEM_CLR_CMMA:
1383 ret = sclp.has_cmma ? 0 : -ENXIO;
1385 case KVM_S390_VM_MEM_LIMIT_SIZE:
1393 case KVM_S390_VM_TOD:
1394 switch (attr->attr) {
1395 case KVM_S390_VM_TOD_LOW:
1396 case KVM_S390_VM_TOD_HIGH:
1404 case KVM_S390_VM_CPU_MODEL:
1405 switch (attr->attr) {
1406 case KVM_S390_VM_CPU_PROCESSOR:
1407 case KVM_S390_VM_CPU_MACHINE:
1408 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1409 case KVM_S390_VM_CPU_MACHINE_FEAT:
1410 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1413 /* configuring subfunctions is not supported yet */
1414 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1420 case KVM_S390_VM_CRYPTO:
1421 switch (attr->attr) {
1422 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1423 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1424 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1425 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1433 case KVM_S390_VM_MIGRATION:
1444 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1448 int srcu_idx, i, r = 0;
1450 if (args->flags != 0)
1453 /* Is this guest using storage keys? */
1454 if (!mm_use_skey(current->mm))
1455 return KVM_S390_GET_SKEYS_NONE;
1457 /* Enforce sane limit on memory allocation */
1458 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1461 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1465 down_read(¤t->mm->mmap_sem);
1466 srcu_idx = srcu_read_lock(&kvm->srcu);
1467 for (i = 0; i < args->count; i++) {
1468 hva = gfn_to_hva(kvm, args->start_gfn + i);
1469 if (kvm_is_error_hva(hva)) {
1474 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1478 srcu_read_unlock(&kvm->srcu, srcu_idx);
1479 up_read(¤t->mm->mmap_sem);
1482 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1483 sizeof(uint8_t) * args->count);
1492 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1496 int srcu_idx, i, r = 0;
1498 if (args->flags != 0)
1501 /* Enforce sane limit on memory allocation */
1502 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1505 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1509 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1510 sizeof(uint8_t) * args->count);
1516 /* Enable storage key handling for the guest */
1517 r = s390_enable_skey();
1521 down_read(¤t->mm->mmap_sem);
1522 srcu_idx = srcu_read_lock(&kvm->srcu);
1523 for (i = 0; i < args->count; i++) {
1524 hva = gfn_to_hva(kvm, args->start_gfn + i);
1525 if (kvm_is_error_hva(hva)) {
1530 /* Lowest order bit is reserved */
1531 if (keys[i] & 0x01) {
1536 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1540 srcu_read_unlock(&kvm->srcu, srcu_idx);
1541 up_read(¤t->mm->mmap_sem);
1548 * Base address and length must be sent at the start of each block, therefore
1549 * it's cheaper to send some clean data, as long as it's less than the size of
1552 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1553 /* for consistency */
1554 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1557 * This function searches for the next page with dirty CMMA attributes, and
1558 * saves the attributes in the buffer up to either the end of the buffer or
1559 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1560 * no trailing clean bytes are saved.
1561 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1562 * output buffer will indicate 0 as length.
1564 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1565 struct kvm_s390_cmma_log *args)
1567 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1568 unsigned long bufsize, hva, pgstev, i, next, cur;
1569 int srcu_idx, peek, r = 0, rr;
1572 cur = args->start_gfn;
1573 i = next = pgstev = 0;
1575 if (unlikely(!kvm->arch.use_cmma))
1577 /* Invalid/unsupported flags were specified */
1578 if (args->flags & ~KVM_S390_CMMA_PEEK)
1580 /* Migration mode query, and we are not doing a migration */
1581 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1584 /* CMMA is disabled or was not used, or the buffer has length zero */
1585 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1586 if (!bufsize || !kvm->mm->context.use_cmma) {
1587 memset(args, 0, sizeof(*args));
1592 /* We are not peeking, and there are no dirty pages */
1593 if (!atomic64_read(&s->dirty_pages)) {
1594 memset(args, 0, sizeof(*args));
1597 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1599 if (cur >= s->bitmap_size) /* nothing found, loop back */
1600 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1601 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1602 memset(args, 0, sizeof(*args));
1605 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1608 res = vmalloc(bufsize);
1612 args->start_gfn = cur;
1614 down_read(&kvm->mm->mmap_sem);
1615 srcu_idx = srcu_read_lock(&kvm->srcu);
1616 while (i < bufsize) {
1617 hva = gfn_to_hva(kvm, cur);
1618 if (kvm_is_error_hva(hva)) {
1622 /* decrement only if we actually flipped the bit to 0 */
1623 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1624 atomic64_dec(&s->dirty_pages);
1625 r = get_pgste(kvm->mm, hva, &pgstev);
1628 /* save the value */
1629 res[i++] = (pgstev >> 24) & 0x43;
1631 * if the next bit is too far away, stop.
1632 * if we reached the previous "next", find the next one
1635 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1638 next = find_next_bit(s->pgste_bitmap,
1639 s->bitmap_size, cur + 1);
1640 /* reached the end of the bitmap or of the buffer, stop */
1641 if ((next >= s->bitmap_size) ||
1642 (next >= args->start_gfn + bufsize))
1647 srcu_read_unlock(&kvm->srcu, srcu_idx);
1648 up_read(&kvm->mm->mmap_sem);
1650 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1652 rr = copy_to_user((void __user *)args->values, res, args->count);
1661 * This function sets the CMMA attributes for the given pages. If the input
1662 * buffer has zero length, no action is taken, otherwise the attributes are
1663 * set and the mm->context.use_cmma flag is set.
1665 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1666 const struct kvm_s390_cmma_log *args)
1668 unsigned long hva, mask, pgstev, i;
1670 int srcu_idx, r = 0;
1674 if (!kvm->arch.use_cmma)
1676 /* invalid/unsupported flags */
1677 if (args->flags != 0)
1679 /* Enforce sane limit on memory allocation */
1680 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1683 if (args->count == 0)
1686 bits = vmalloc(sizeof(*bits) * args->count);
1690 r = copy_from_user(bits, (void __user *)args->values, args->count);
1696 down_read(&kvm->mm->mmap_sem);
1697 srcu_idx = srcu_read_lock(&kvm->srcu);
1698 for (i = 0; i < args->count; i++) {
1699 hva = gfn_to_hva(kvm, args->start_gfn + i);
1700 if (kvm_is_error_hva(hva)) {
1706 pgstev = pgstev << 24;
1707 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1708 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1710 srcu_read_unlock(&kvm->srcu, srcu_idx);
1711 up_read(&kvm->mm->mmap_sem);
1713 if (!kvm->mm->context.use_cmma) {
1714 down_write(&kvm->mm->mmap_sem);
1715 kvm->mm->context.use_cmma = 1;
1716 up_write(&kvm->mm->mmap_sem);
1723 long kvm_arch_vm_ioctl(struct file *filp,
1724 unsigned int ioctl, unsigned long arg)
1726 struct kvm *kvm = filp->private_data;
1727 void __user *argp = (void __user *)arg;
1728 struct kvm_device_attr attr;
1732 case KVM_S390_INTERRUPT: {
1733 struct kvm_s390_interrupt s390int;
1736 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1738 r = kvm_s390_inject_vm(kvm, &s390int);
1741 case KVM_ENABLE_CAP: {
1742 struct kvm_enable_cap cap;
1744 if (copy_from_user(&cap, argp, sizeof(cap)))
1746 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1749 case KVM_CREATE_IRQCHIP: {
1750 struct kvm_irq_routing_entry routing;
1753 if (kvm->arch.use_irqchip) {
1754 /* Set up dummy routing. */
1755 memset(&routing, 0, sizeof(routing));
1756 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1760 case KVM_SET_DEVICE_ATTR: {
1762 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1764 r = kvm_s390_vm_set_attr(kvm, &attr);
1767 case KVM_GET_DEVICE_ATTR: {
1769 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1771 r = kvm_s390_vm_get_attr(kvm, &attr);
1774 case KVM_HAS_DEVICE_ATTR: {
1776 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1778 r = kvm_s390_vm_has_attr(kvm, &attr);
1781 case KVM_S390_GET_SKEYS: {
1782 struct kvm_s390_skeys args;
1785 if (copy_from_user(&args, argp,
1786 sizeof(struct kvm_s390_skeys)))
1788 r = kvm_s390_get_skeys(kvm, &args);
1791 case KVM_S390_SET_SKEYS: {
1792 struct kvm_s390_skeys args;
1795 if (copy_from_user(&args, argp,
1796 sizeof(struct kvm_s390_skeys)))
1798 r = kvm_s390_set_skeys(kvm, &args);
1801 case KVM_S390_GET_CMMA_BITS: {
1802 struct kvm_s390_cmma_log args;
1805 if (copy_from_user(&args, argp, sizeof(args)))
1807 mutex_lock(&kvm->slots_lock);
1808 r = kvm_s390_get_cmma_bits(kvm, &args);
1809 mutex_unlock(&kvm->slots_lock);
1811 r = copy_to_user(argp, &args, sizeof(args));
1817 case KVM_S390_SET_CMMA_BITS: {
1818 struct kvm_s390_cmma_log args;
1821 if (copy_from_user(&args, argp, sizeof(args)))
1823 mutex_lock(&kvm->slots_lock);
1824 r = kvm_s390_set_cmma_bits(kvm, &args);
1825 mutex_unlock(&kvm->slots_lock);
1835 static int kvm_s390_query_ap_config(u8 *config)
1837 u32 fcn_code = 0x04000000UL;
1840 memset(config, 0, 128);
1844 ".long 0xb2af0000\n" /* PQAP(QCI) */
1850 : "r" (fcn_code), "r" (config)
1851 : "cc", "0", "2", "memory"
1857 static int kvm_s390_apxa_installed(void)
1862 if (test_facility(12)) {
1863 cc = kvm_s390_query_ap_config(config);
1866 pr_err("PQAP(QCI) failed with cc=%d", cc);
1868 return config[0] & 0x40;
1874 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1876 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1878 if (kvm_s390_apxa_installed())
1879 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1881 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1884 static u64 kvm_s390_get_initial_cpuid(void)
1889 cpuid.version = 0xff;
1890 return *((u64 *) &cpuid);
1893 static void kvm_s390_crypto_init(struct kvm *kvm)
1895 if (!test_kvm_facility(kvm, 76))
1898 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1899 kvm_s390_set_crycb_format(kvm);
1901 /* Enable AES/DEA protected key functions by default */
1902 kvm->arch.crypto.aes_kw = 1;
1903 kvm->arch.crypto.dea_kw = 1;
1904 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1905 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1906 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1907 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1910 static void sca_dispose(struct kvm *kvm)
1912 if (kvm->arch.use_esca)
1913 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1915 free_page((unsigned long)(kvm->arch.sca));
1916 kvm->arch.sca = NULL;
1919 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1921 gfp_t alloc_flags = GFP_KERNEL;
1923 char debug_name[16];
1924 static unsigned long sca_offset;
1927 #ifdef CONFIG_KVM_S390_UCONTROL
1928 if (type & ~KVM_VM_S390_UCONTROL)
1930 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1937 rc = s390_enable_sie();
1943 kvm->arch.use_esca = 0; /* start with basic SCA */
1944 if (!sclp.has_64bscao)
1945 alloc_flags |= GFP_DMA;
1946 rwlock_init(&kvm->arch.sca_lock);
1947 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1950 spin_lock(&kvm_lock);
1952 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1954 kvm->arch.sca = (struct bsca_block *)
1955 ((char *) kvm->arch.sca + sca_offset);
1956 spin_unlock(&kvm_lock);
1958 sprintf(debug_name, "kvm-%u", current->pid);
1960 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1964 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
1965 kvm->arch.sie_page2 =
1966 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1967 if (!kvm->arch.sie_page2)
1970 /* Populate the facility mask initially. */
1971 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1972 sizeof(S390_lowcore.stfle_fac_list));
1973 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1974 if (i < kvm_s390_fac_list_mask_size())
1975 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1977 kvm->arch.model.fac_mask[i] = 0UL;
1980 /* Populate the facility list initially. */
1981 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1982 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1983 S390_ARCH_FAC_LIST_SIZE_BYTE);
1985 /* we are always in czam mode - even on pre z14 machines */
1986 set_kvm_facility(kvm->arch.model.fac_mask, 138);
1987 set_kvm_facility(kvm->arch.model.fac_list, 138);
1988 /* we emulate STHYI in kvm */
1989 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1990 set_kvm_facility(kvm->arch.model.fac_list, 74);
1991 if (MACHINE_HAS_TLB_GUEST) {
1992 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1993 set_kvm_facility(kvm->arch.model.fac_list, 147);
1996 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1997 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1999 kvm_s390_crypto_init(kvm);
2001 mutex_init(&kvm->arch.float_int.ais_lock);
2002 kvm->arch.float_int.simm = 0;
2003 kvm->arch.float_int.nimm = 0;
2004 spin_lock_init(&kvm->arch.float_int.lock);
2005 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2006 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2007 init_waitqueue_head(&kvm->arch.ipte_wq);
2008 mutex_init(&kvm->arch.ipte_mutex);
2010 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2011 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2013 if (type & KVM_VM_S390_UCONTROL) {
2014 kvm->arch.gmap = NULL;
2015 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2017 if (sclp.hamax == U64_MAX)
2018 kvm->arch.mem_limit = TASK_SIZE_MAX;
2020 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2022 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2023 if (!kvm->arch.gmap)
2025 kvm->arch.gmap->private = kvm;
2026 kvm->arch.gmap->pfault_enabled = 0;
2029 kvm->arch.css_support = 0;
2030 kvm->arch.use_irqchip = 0;
2031 kvm->arch.epoch = 0;
2033 spin_lock_init(&kvm->arch.start_stop_lock);
2034 kvm_s390_vsie_init(kvm);
2035 kvm_s390_gisa_init(kvm);
2036 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2040 free_page((unsigned long)kvm->arch.sie_page2);
2041 debug_unregister(kvm->arch.dbf);
2043 KVM_EVENT(3, "creation of vm failed: %d", rc);
2047 bool kvm_arch_has_vcpu_debugfs(void)
2052 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2057 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2059 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2060 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2061 kvm_s390_clear_local_irqs(vcpu);
2062 kvm_clear_async_pf_completion_queue(vcpu);
2063 if (!kvm_is_ucontrol(vcpu->kvm))
2066 if (kvm_is_ucontrol(vcpu->kvm))
2067 gmap_remove(vcpu->arch.gmap);
2069 if (vcpu->kvm->arch.use_cmma)
2070 kvm_s390_vcpu_unsetup_cmma(vcpu);
2071 free_page((unsigned long)(vcpu->arch.sie_block));
2073 kvm_vcpu_uninit(vcpu);
2074 kmem_cache_free(kvm_vcpu_cache, vcpu);
2077 static void kvm_free_vcpus(struct kvm *kvm)
2080 struct kvm_vcpu *vcpu;
2082 kvm_for_each_vcpu(i, vcpu, kvm)
2083 kvm_arch_vcpu_destroy(vcpu);
2085 mutex_lock(&kvm->lock);
2086 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2087 kvm->vcpus[i] = NULL;
2089 atomic_set(&kvm->online_vcpus, 0);
2090 mutex_unlock(&kvm->lock);
2093 void kvm_arch_destroy_vm(struct kvm *kvm)
2095 kvm_free_vcpus(kvm);
2097 debug_unregister(kvm->arch.dbf);
2098 kvm_s390_gisa_destroy(kvm);
2099 free_page((unsigned long)kvm->arch.sie_page2);
2100 if (!kvm_is_ucontrol(kvm))
2101 gmap_remove(kvm->arch.gmap);
2102 kvm_s390_destroy_adapters(kvm);
2103 kvm_s390_clear_float_irqs(kvm);
2104 kvm_s390_vsie_destroy(kvm);
2105 if (kvm->arch.migration_state) {
2106 vfree(kvm->arch.migration_state->pgste_bitmap);
2107 kfree(kvm->arch.migration_state);
2109 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2112 /* Section: vcpu related */
2113 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2115 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2116 if (!vcpu->arch.gmap)
2118 vcpu->arch.gmap->private = vcpu->kvm;
2123 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2125 if (!kvm_s390_use_sca_entries())
2127 read_lock(&vcpu->kvm->arch.sca_lock);
2128 if (vcpu->kvm->arch.use_esca) {
2129 struct esca_block *sca = vcpu->kvm->arch.sca;
2131 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2132 sca->cpu[vcpu->vcpu_id].sda = 0;
2134 struct bsca_block *sca = vcpu->kvm->arch.sca;
2136 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2137 sca->cpu[vcpu->vcpu_id].sda = 0;
2139 read_unlock(&vcpu->kvm->arch.sca_lock);
2142 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2144 if (!kvm_s390_use_sca_entries()) {
2145 struct bsca_block *sca = vcpu->kvm->arch.sca;
2147 /* we still need the basic sca for the ipte control */
2148 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2149 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2152 read_lock(&vcpu->kvm->arch.sca_lock);
2153 if (vcpu->kvm->arch.use_esca) {
2154 struct esca_block *sca = vcpu->kvm->arch.sca;
2156 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2157 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2158 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2159 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2160 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2162 struct bsca_block *sca = vcpu->kvm->arch.sca;
2164 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2165 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2166 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2167 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2169 read_unlock(&vcpu->kvm->arch.sca_lock);
2172 /* Basic SCA to Extended SCA data copy routines */
2173 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2176 d->sigp_ctrl.c = s->sigp_ctrl.c;
2177 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2180 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2184 d->ipte_control = s->ipte_control;
2186 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2187 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2190 static int sca_switch_to_extended(struct kvm *kvm)
2192 struct bsca_block *old_sca = kvm->arch.sca;
2193 struct esca_block *new_sca;
2194 struct kvm_vcpu *vcpu;
2195 unsigned int vcpu_idx;
2198 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2202 scaoh = (u32)((u64)(new_sca) >> 32);
2203 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2205 kvm_s390_vcpu_block_all(kvm);
2206 write_lock(&kvm->arch.sca_lock);
2208 sca_copy_b_to_e(new_sca, old_sca);
2210 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2211 vcpu->arch.sie_block->scaoh = scaoh;
2212 vcpu->arch.sie_block->scaol = scaol;
2213 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2215 kvm->arch.sca = new_sca;
2216 kvm->arch.use_esca = 1;
2218 write_unlock(&kvm->arch.sca_lock);
2219 kvm_s390_vcpu_unblock_all(kvm);
2221 free_page((unsigned long)old_sca);
2223 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2224 old_sca, kvm->arch.sca);
2228 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2232 if (!kvm_s390_use_sca_entries()) {
2233 if (id < KVM_MAX_VCPUS)
2237 if (id < KVM_S390_BSCA_CPU_SLOTS)
2239 if (!sclp.has_esca || !sclp.has_64bscao)
2242 mutex_lock(&kvm->lock);
2243 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2244 mutex_unlock(&kvm->lock);
2246 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2249 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2251 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2252 kvm_clear_async_pf_completion_queue(vcpu);
2253 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2259 kvm_s390_set_prefix(vcpu, 0);
2260 if (test_kvm_facility(vcpu->kvm, 64))
2261 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2262 if (test_kvm_facility(vcpu->kvm, 82))
2263 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2264 if (test_kvm_facility(vcpu->kvm, 133))
2265 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2266 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2267 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2270 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2272 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2274 if (kvm_is_ucontrol(vcpu->kvm))
2275 return __kvm_ucontrol_vcpu_init(vcpu);
2280 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2281 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2283 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2284 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2285 vcpu->arch.cputm_start = get_tod_clock_fast();
2286 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2289 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2290 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2292 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2293 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2294 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2295 vcpu->arch.cputm_start = 0;
2296 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2299 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2300 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2302 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2303 vcpu->arch.cputm_enabled = true;
2304 __start_cpu_timer_accounting(vcpu);
2307 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2308 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2310 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2311 __stop_cpu_timer_accounting(vcpu);
2312 vcpu->arch.cputm_enabled = false;
2315 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2317 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2318 __enable_cpu_timer_accounting(vcpu);
2322 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2324 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2325 __disable_cpu_timer_accounting(vcpu);
2329 /* set the cpu timer - may only be called from the VCPU thread itself */
2330 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2332 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2333 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2334 if (vcpu->arch.cputm_enabled)
2335 vcpu->arch.cputm_start = get_tod_clock_fast();
2336 vcpu->arch.sie_block->cputm = cputm;
2337 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2341 /* update and get the cpu timer - can also be called from other VCPU threads */
2342 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2347 if (unlikely(!vcpu->arch.cputm_enabled))
2348 return vcpu->arch.sie_block->cputm;
2350 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2352 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2354 * If the writer would ever execute a read in the critical
2355 * section, e.g. in irq context, we have a deadlock.
2357 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2358 value = vcpu->arch.sie_block->cputm;
2359 /* if cputm_start is 0, accounting is being started/stopped */
2360 if (likely(vcpu->arch.cputm_start))
2361 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2362 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2367 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2370 gmap_enable(vcpu->arch.enabled_gmap);
2371 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2372 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2373 __start_cpu_timer_accounting(vcpu);
2377 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2380 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2381 __stop_cpu_timer_accounting(vcpu);
2382 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2383 vcpu->arch.enabled_gmap = gmap_get_enabled();
2384 gmap_disable(vcpu->arch.enabled_gmap);
2388 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2390 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2391 vcpu->arch.sie_block->gpsw.mask = 0UL;
2392 vcpu->arch.sie_block->gpsw.addr = 0UL;
2393 kvm_s390_set_prefix(vcpu, 0);
2394 kvm_s390_set_cpu_timer(vcpu, 0);
2395 vcpu->arch.sie_block->ckc = 0UL;
2396 vcpu->arch.sie_block->todpr = 0;
2397 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2398 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2399 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2400 /* make sure the new fpc will be lazily loaded */
2402 current->thread.fpu.fpc = 0;
2403 vcpu->arch.sie_block->gbea = 1;
2404 vcpu->arch.sie_block->pp = 0;
2405 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2406 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2407 kvm_clear_async_pf_completion_queue(vcpu);
2408 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2409 kvm_s390_vcpu_stop(vcpu);
2410 kvm_s390_clear_local_irqs(vcpu);
2413 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2415 mutex_lock(&vcpu->kvm->lock);
2417 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2418 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2420 mutex_unlock(&vcpu->kvm->lock);
2421 if (!kvm_is_ucontrol(vcpu->kvm)) {
2422 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2425 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2426 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2427 /* make vcpu_load load the right gmap on the first trigger */
2428 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2431 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2433 if (!test_kvm_facility(vcpu->kvm, 76))
2436 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2438 if (vcpu->kvm->arch.crypto.aes_kw)
2439 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2440 if (vcpu->kvm->arch.crypto.dea_kw)
2441 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2443 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2446 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2448 free_page(vcpu->arch.sie_block->cbrlo);
2449 vcpu->arch.sie_block->cbrlo = 0;
2452 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2454 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2455 if (!vcpu->arch.sie_block->cbrlo)
2458 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2462 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2464 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2466 vcpu->arch.sie_block->ibc = model->ibc;
2467 if (test_kvm_facility(vcpu->kvm, 7))
2468 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2471 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2475 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2479 if (test_kvm_facility(vcpu->kvm, 78))
2480 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2481 else if (test_kvm_facility(vcpu->kvm, 8))
2482 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2484 kvm_s390_vcpu_setup_model(vcpu);
2486 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2487 if (MACHINE_HAS_ESOP)
2488 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2489 if (test_kvm_facility(vcpu->kvm, 9))
2490 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2491 if (test_kvm_facility(vcpu->kvm, 73))
2492 vcpu->arch.sie_block->ecb |= ECB_TE;
2494 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2495 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2496 if (test_kvm_facility(vcpu->kvm, 130))
2497 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2498 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2500 vcpu->arch.sie_block->eca |= ECA_CEI;
2502 vcpu->arch.sie_block->eca |= ECA_IB;
2504 vcpu->arch.sie_block->eca |= ECA_SII;
2505 if (sclp.has_sigpif)
2506 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2507 if (test_kvm_facility(vcpu->kvm, 129)) {
2508 vcpu->arch.sie_block->eca |= ECA_VX;
2509 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2511 if (test_kvm_facility(vcpu->kvm, 139))
2512 vcpu->arch.sie_block->ecd |= ECD_MEF;
2514 if (vcpu->arch.sie_block->gd) {
2515 vcpu->arch.sie_block->eca |= ECA_AIV;
2516 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2517 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2519 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2521 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2524 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2526 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2528 if (vcpu->kvm->arch.use_cmma) {
2529 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2533 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2534 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2536 kvm_s390_vcpu_crypto_setup(vcpu);
2541 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2544 struct kvm_vcpu *vcpu;
2545 struct sie_page *sie_page;
2548 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2553 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2557 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2558 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2562 vcpu->arch.sie_block = &sie_page->sie_block;
2563 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2565 /* the real guest size will always be smaller than msl */
2566 vcpu->arch.sie_block->mso = 0;
2567 vcpu->arch.sie_block->msl = sclp.hamax;
2569 vcpu->arch.sie_block->icpua = id;
2570 spin_lock_init(&vcpu->arch.local_int.lock);
2571 vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2572 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2573 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2574 seqcount_init(&vcpu->arch.cputm_seqcount);
2576 rc = kvm_vcpu_init(vcpu, kvm, id);
2578 goto out_free_sie_block;
2579 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2580 vcpu->arch.sie_block);
2581 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2585 free_page((unsigned long)(vcpu->arch.sie_block));
2587 kmem_cache_free(kvm_vcpu_cache, vcpu);
2592 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2594 return kvm_s390_vcpu_has_irq(vcpu, 0);
2597 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2599 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2602 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2604 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2608 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2610 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2613 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2615 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2619 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2621 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2625 * Kick a guest cpu out of SIE and wait until SIE is not running.
2626 * If the CPU is not running (e.g. waiting as idle) the function will
2627 * return immediately. */
2628 void exit_sie(struct kvm_vcpu *vcpu)
2630 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2631 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2635 /* Kick a guest cpu out of SIE to process a request synchronously */
2636 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2638 kvm_make_request(req, vcpu);
2639 kvm_s390_vcpu_request(vcpu);
2642 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2645 struct kvm *kvm = gmap->private;
2646 struct kvm_vcpu *vcpu;
2647 unsigned long prefix;
2650 if (gmap_is_shadow(gmap))
2652 if (start >= 1UL << 31)
2653 /* We are only interested in prefix pages */
2655 kvm_for_each_vcpu(i, vcpu, kvm) {
2656 /* match against both prefix pages */
2657 prefix = kvm_s390_get_prefix(vcpu);
2658 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2659 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2661 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2666 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2668 /* kvm common code refers to this, but never calls it */
2673 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2674 struct kvm_one_reg *reg)
2679 case KVM_REG_S390_TODPR:
2680 r = put_user(vcpu->arch.sie_block->todpr,
2681 (u32 __user *)reg->addr);
2683 case KVM_REG_S390_EPOCHDIFF:
2684 r = put_user(vcpu->arch.sie_block->epoch,
2685 (u64 __user *)reg->addr);
2687 case KVM_REG_S390_CPU_TIMER:
2688 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2689 (u64 __user *)reg->addr);
2691 case KVM_REG_S390_CLOCK_COMP:
2692 r = put_user(vcpu->arch.sie_block->ckc,
2693 (u64 __user *)reg->addr);
2695 case KVM_REG_S390_PFTOKEN:
2696 r = put_user(vcpu->arch.pfault_token,
2697 (u64 __user *)reg->addr);
2699 case KVM_REG_S390_PFCOMPARE:
2700 r = put_user(vcpu->arch.pfault_compare,
2701 (u64 __user *)reg->addr);
2703 case KVM_REG_S390_PFSELECT:
2704 r = put_user(vcpu->arch.pfault_select,
2705 (u64 __user *)reg->addr);
2707 case KVM_REG_S390_PP:
2708 r = put_user(vcpu->arch.sie_block->pp,
2709 (u64 __user *)reg->addr);
2711 case KVM_REG_S390_GBEA:
2712 r = put_user(vcpu->arch.sie_block->gbea,
2713 (u64 __user *)reg->addr);
2722 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2723 struct kvm_one_reg *reg)
2729 case KVM_REG_S390_TODPR:
2730 r = get_user(vcpu->arch.sie_block->todpr,
2731 (u32 __user *)reg->addr);
2733 case KVM_REG_S390_EPOCHDIFF:
2734 r = get_user(vcpu->arch.sie_block->epoch,
2735 (u64 __user *)reg->addr);
2737 case KVM_REG_S390_CPU_TIMER:
2738 r = get_user(val, (u64 __user *)reg->addr);
2740 kvm_s390_set_cpu_timer(vcpu, val);
2742 case KVM_REG_S390_CLOCK_COMP:
2743 r = get_user(vcpu->arch.sie_block->ckc,
2744 (u64 __user *)reg->addr);
2746 case KVM_REG_S390_PFTOKEN:
2747 r = get_user(vcpu->arch.pfault_token,
2748 (u64 __user *)reg->addr);
2749 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2750 kvm_clear_async_pf_completion_queue(vcpu);
2752 case KVM_REG_S390_PFCOMPARE:
2753 r = get_user(vcpu->arch.pfault_compare,
2754 (u64 __user *)reg->addr);
2756 case KVM_REG_S390_PFSELECT:
2757 r = get_user(vcpu->arch.pfault_select,
2758 (u64 __user *)reg->addr);
2760 case KVM_REG_S390_PP:
2761 r = get_user(vcpu->arch.sie_block->pp,
2762 (u64 __user *)reg->addr);
2764 case KVM_REG_S390_GBEA:
2765 r = get_user(vcpu->arch.sie_block->gbea,
2766 (u64 __user *)reg->addr);
2775 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2777 kvm_s390_vcpu_initial_reset(vcpu);
2781 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2784 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2789 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2792 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2797 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2798 struct kvm_sregs *sregs)
2802 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2803 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2809 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2810 struct kvm_sregs *sregs)
2814 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2815 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2821 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2827 if (test_fp_ctl(fpu->fpc)) {
2831 vcpu->run->s.regs.fpc = fpu->fpc;
2833 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2834 (freg_t *) fpu->fprs);
2836 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2843 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2847 /* make sure we have the latest values */
2850 convert_vx_to_fp((freg_t *) fpu->fprs,
2851 (__vector128 *) vcpu->run->s.regs.vrs);
2853 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2854 fpu->fpc = vcpu->run->s.regs.fpc;
2860 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2864 if (!is_vcpu_stopped(vcpu))
2867 vcpu->run->psw_mask = psw.mask;
2868 vcpu->run->psw_addr = psw.addr;
2873 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2874 struct kvm_translation *tr)
2876 return -EINVAL; /* not implemented yet */
2879 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2880 KVM_GUESTDBG_USE_HW_BP | \
2881 KVM_GUESTDBG_ENABLE)
2883 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2884 struct kvm_guest_debug *dbg)
2890 vcpu->guest_debug = 0;
2891 kvm_s390_clear_bp_data(vcpu);
2893 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2897 if (!sclp.has_gpere) {
2902 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2903 vcpu->guest_debug = dbg->control;
2904 /* enforce guest PER */
2905 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
2907 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2908 rc = kvm_s390_import_bp_data(vcpu, dbg);
2910 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2911 vcpu->arch.guestdbg.last_bp = 0;
2915 vcpu->guest_debug = 0;
2916 kvm_s390_clear_bp_data(vcpu);
2917 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2925 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2926 struct kvm_mp_state *mp_state)
2932 /* CHECK_STOP and LOAD are not supported yet */
2933 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2934 KVM_MP_STATE_OPERATING;
2940 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2941 struct kvm_mp_state *mp_state)
2947 /* user space knows about this interface - let it control the state */
2948 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2950 switch (mp_state->mp_state) {
2951 case KVM_MP_STATE_STOPPED:
2952 kvm_s390_vcpu_stop(vcpu);
2954 case KVM_MP_STATE_OPERATING:
2955 kvm_s390_vcpu_start(vcpu);
2957 case KVM_MP_STATE_LOAD:
2958 case KVM_MP_STATE_CHECK_STOP:
2959 /* fall through - CHECK_STOP and LOAD are not supported yet */
2968 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2970 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
2973 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2976 kvm_s390_vcpu_request_handled(vcpu);
2977 if (!kvm_request_pending(vcpu))
2980 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2981 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2982 * This ensures that the ipte instruction for this request has
2983 * already finished. We might race against a second unmapper that
2984 * wants to set the blocking bit. Lets just retry the request loop.
2986 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2988 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2989 kvm_s390_get_prefix(vcpu),
2990 PAGE_SIZE * 2, PROT_WRITE);
2992 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2998 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2999 vcpu->arch.sie_block->ihcpu = 0xffff;
3003 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3004 if (!ibs_enabled(vcpu)) {
3005 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3006 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3011 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3012 if (ibs_enabled(vcpu)) {
3013 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3014 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3019 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3020 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3024 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3026 * Disable CMMA virtualization; we will emulate the ESSA
3027 * instruction manually, in order to provide additional
3028 * functionalities needed for live migration.
3030 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3034 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3036 * Re-enable CMMA virtualization if CMMA is available and
3039 if ((vcpu->kvm->arch.use_cmma) &&
3040 (vcpu->kvm->mm->context.use_cmma))
3041 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3045 /* nothing to do, just clear the request */
3046 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3051 void kvm_s390_set_tod_clock(struct kvm *kvm,
3052 const struct kvm_s390_vm_tod_clock *gtod)
3054 struct kvm_vcpu *vcpu;
3055 struct kvm_s390_tod_clock_ext htod;
3058 mutex_lock(&kvm->lock);
3061 get_tod_clock_ext((char *)&htod);
3063 kvm->arch.epoch = gtod->tod - htod.tod;
3065 if (test_kvm_facility(kvm, 139)) {
3066 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3067 if (kvm->arch.epoch > gtod->tod)
3068 kvm->arch.epdx -= 1;
3071 kvm_s390_vcpu_block_all(kvm);
3072 kvm_for_each_vcpu(i, vcpu, kvm) {
3073 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3074 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3077 kvm_s390_vcpu_unblock_all(kvm);
3079 mutex_unlock(&kvm->lock);
3083 * kvm_arch_fault_in_page - fault-in guest page if necessary
3084 * @vcpu: The corresponding virtual cpu
3085 * @gpa: Guest physical address
3086 * @writable: Whether the page should be writable or not
3088 * Make sure that a guest page has been faulted-in on the host.
3090 * Return: Zero on success, negative error code otherwise.
3092 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3094 return gmap_fault(vcpu->arch.gmap, gpa,
3095 writable ? FAULT_FLAG_WRITE : 0);
3098 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3099 unsigned long token)
3101 struct kvm_s390_interrupt inti;
3102 struct kvm_s390_irq irq;
3105 irq.u.ext.ext_params2 = token;
3106 irq.type = KVM_S390_INT_PFAULT_INIT;
3107 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3109 inti.type = KVM_S390_INT_PFAULT_DONE;
3110 inti.parm64 = token;
3111 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3115 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3116 struct kvm_async_pf *work)
3118 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3119 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3122 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3123 struct kvm_async_pf *work)
3125 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3126 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3129 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3130 struct kvm_async_pf *work)
3132 /* s390 will always inject the page directly */
3135 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3138 * s390 will always inject the page directly,
3139 * but we still want check_async_completion to cleanup
3144 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3147 struct kvm_arch_async_pf arch;
3150 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3152 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3153 vcpu->arch.pfault_compare)
3155 if (psw_extint_disabled(vcpu))
3157 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3159 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3161 if (!vcpu->arch.gmap->pfault_enabled)
3164 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3165 hva += current->thread.gmap_addr & ~PAGE_MASK;
3166 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3169 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3173 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3178 * On s390 notifications for arriving pages will be delivered directly
3179 * to the guest but the house keeping for completed pfaults is
3180 * handled outside the worker.
3182 kvm_check_async_pf_completion(vcpu);
3184 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3185 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3190 if (test_cpu_flag(CIF_MCCK_PENDING))
3193 if (!kvm_is_ucontrol(vcpu->kvm)) {
3194 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3199 rc = kvm_s390_handle_requests(vcpu);
3203 if (guestdbg_enabled(vcpu)) {
3204 kvm_s390_backup_guest_per_regs(vcpu);
3205 kvm_s390_patch_guest_per_regs(vcpu);
3208 vcpu->arch.sie_block->icptcode = 0;
3209 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3210 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3211 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3216 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3218 struct kvm_s390_pgm_info pgm_info = {
3219 .code = PGM_ADDRESSING,
3224 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3225 trace_kvm_s390_sie_fault(vcpu);
3228 * We want to inject an addressing exception, which is defined as a
3229 * suppressing or terminating exception. However, since we came here
3230 * by a DAT access exception, the PSW still points to the faulting
3231 * instruction since DAT exceptions are nullifying. So we've got
3232 * to look up the current opcode to get the length of the instruction
3233 * to be able to forward the PSW.
3235 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3236 ilen = insn_length(opcode);
3240 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3241 * Forward by arbitrary ilc, injection will take care of
3242 * nullification if necessary.
3244 pgm_info = vcpu->arch.pgm;
3247 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3248 kvm_s390_forward_psw(vcpu, ilen);
3249 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3252 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3254 struct mcck_volatile_info *mcck_info;
3255 struct sie_page *sie_page;
3257 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3258 vcpu->arch.sie_block->icptcode);
3259 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3261 if (guestdbg_enabled(vcpu))
3262 kvm_s390_restore_guest_per_regs(vcpu);
3264 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3265 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3267 if (exit_reason == -EINTR) {
3268 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3269 sie_page = container_of(vcpu->arch.sie_block,
3270 struct sie_page, sie_block);
3271 mcck_info = &sie_page->mcck_info;
3272 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3276 if (vcpu->arch.sie_block->icptcode > 0) {
3277 int rc = kvm_handle_sie_intercept(vcpu);
3279 if (rc != -EOPNOTSUPP)
3281 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3282 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3283 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3284 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3286 } else if (exit_reason != -EFAULT) {
3287 vcpu->stat.exit_null++;
3289 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3290 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3291 vcpu->run->s390_ucontrol.trans_exc_code =
3292 current->thread.gmap_addr;
3293 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3295 } else if (current->thread.gmap_pfault) {
3296 trace_kvm_s390_major_guest_pfault(vcpu);
3297 current->thread.gmap_pfault = 0;
3298 if (kvm_arch_setup_async_pf(vcpu))
3300 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3302 return vcpu_post_run_fault_in_sie(vcpu);
3305 static int __vcpu_run(struct kvm_vcpu *vcpu)
3307 int rc, exit_reason;
3310 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3311 * ning the guest), so that memslots (and other stuff) are protected
3313 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3316 rc = vcpu_pre_run(vcpu);
3320 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3322 * As PF_VCPU will be used in fault handler, between
3323 * guest_enter and guest_exit should be no uaccess.
3325 local_irq_disable();
3326 guest_enter_irqoff();
3327 __disable_cpu_timer_accounting(vcpu);
3329 exit_reason = sie64a(vcpu->arch.sie_block,
3330 vcpu->run->s.regs.gprs);
3331 local_irq_disable();
3332 __enable_cpu_timer_accounting(vcpu);
3333 guest_exit_irqoff();
3335 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3337 rc = vcpu_post_run(vcpu, exit_reason);
3338 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3340 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3344 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3346 struct runtime_instr_cb *riccb;
3349 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3350 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3351 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3352 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3353 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3354 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3355 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3356 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3357 /* some control register changes require a tlb flush */
3358 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3360 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3361 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3362 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3363 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3364 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3365 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3367 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3368 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3369 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3370 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3371 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3372 kvm_clear_async_pf_completion_queue(vcpu);
3375 * If userspace sets the riccb (e.g. after migration) to a valid state,
3376 * we should enable RI here instead of doing the lazy enablement.
3378 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3379 test_kvm_facility(vcpu->kvm, 64) &&
3381 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3382 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3383 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3386 * If userspace sets the gscb (e.g. after migration) to non-zero,
3387 * we should enable GS here instead of doing the lazy enablement.
3389 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3390 test_kvm_facility(vcpu->kvm, 133) &&
3392 !vcpu->arch.gs_enabled) {
3393 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3394 vcpu->arch.sie_block->ecb |= ECB_GS;
3395 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3396 vcpu->arch.gs_enabled = 1;
3398 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3399 test_kvm_facility(vcpu->kvm, 82)) {
3400 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3401 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3403 save_access_regs(vcpu->arch.host_acrs);
3404 restore_access_regs(vcpu->run->s.regs.acrs);
3405 /* save host (userspace) fprs/vrs */
3407 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3408 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3410 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3412 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3413 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3414 if (test_fp_ctl(current->thread.fpu.fpc))
3415 /* User space provided an invalid FPC, let's clear it */
3416 current->thread.fpu.fpc = 0;
3417 if (MACHINE_HAS_GS) {
3419 __ctl_set_bit(2, 4);
3420 if (current->thread.gs_cb) {
3421 vcpu->arch.host_gscb = current->thread.gs_cb;
3422 save_gs_cb(vcpu->arch.host_gscb);
3424 if (vcpu->arch.gs_enabled) {
3425 current->thread.gs_cb = (struct gs_cb *)
3426 &vcpu->run->s.regs.gscb;
3427 restore_gs_cb(current->thread.gs_cb);
3432 kvm_run->kvm_dirty_regs = 0;
3435 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3437 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3438 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3439 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3440 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3441 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3442 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3443 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3444 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3445 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3446 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3447 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3448 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3449 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3450 save_access_regs(vcpu->run->s.regs.acrs);
3451 restore_access_regs(vcpu->arch.host_acrs);
3452 /* Save guest register state */
3454 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3455 /* Restore will be done lazily at return */
3456 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3457 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3458 if (MACHINE_HAS_GS) {
3459 __ctl_set_bit(2, 4);
3460 if (vcpu->arch.gs_enabled)
3461 save_gs_cb(current->thread.gs_cb);
3463 current->thread.gs_cb = vcpu->arch.host_gscb;
3464 restore_gs_cb(vcpu->arch.host_gscb);
3466 if (!vcpu->arch.host_gscb)
3467 __ctl_clear_bit(2, 4);
3468 vcpu->arch.host_gscb = NULL;
3473 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3477 if (kvm_run->immediate_exit)
3482 if (guestdbg_exit_pending(vcpu)) {
3483 kvm_s390_prepare_debug_exit(vcpu);
3488 kvm_sigset_activate(vcpu);
3490 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3491 kvm_s390_vcpu_start(vcpu);
3492 } else if (is_vcpu_stopped(vcpu)) {
3493 pr_err_ratelimited("can't run stopped vcpu %d\n",
3499 sync_regs(vcpu, kvm_run);
3500 enable_cpu_timer_accounting(vcpu);
3503 rc = __vcpu_run(vcpu);
3505 if (signal_pending(current) && !rc) {
3506 kvm_run->exit_reason = KVM_EXIT_INTR;
3510 if (guestdbg_exit_pending(vcpu) && !rc) {
3511 kvm_s390_prepare_debug_exit(vcpu);
3515 if (rc == -EREMOTE) {
3516 /* userspace support is needed, kvm_run has been prepared */
3520 disable_cpu_timer_accounting(vcpu);
3521 store_regs(vcpu, kvm_run);
3523 kvm_sigset_deactivate(vcpu);
3525 vcpu->stat.exit_userspace++;
3532 * store status at address
3533 * we use have two special cases:
3534 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3535 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3537 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3539 unsigned char archmode = 1;
3540 freg_t fprs[NUM_FPRS];
3545 px = kvm_s390_get_prefix(vcpu);
3546 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3547 if (write_guest_abs(vcpu, 163, &archmode, 1))
3550 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3551 if (write_guest_real(vcpu, 163, &archmode, 1))
3555 gpa -= __LC_FPREGS_SAVE_AREA;
3557 /* manually convert vector registers if necessary */
3558 if (MACHINE_HAS_VX) {
3559 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3560 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3563 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3564 vcpu->run->s.regs.fprs, 128);
3566 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3567 vcpu->run->s.regs.gprs, 128);
3568 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3569 &vcpu->arch.sie_block->gpsw, 16);
3570 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3572 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3573 &vcpu->run->s.regs.fpc, 4);
3574 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3575 &vcpu->arch.sie_block->todpr, 4);
3576 cputm = kvm_s390_get_cpu_timer(vcpu);
3577 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3579 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3580 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3582 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3583 &vcpu->run->s.regs.acrs, 64);
3584 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3585 &vcpu->arch.sie_block->gcr, 128);
3586 return rc ? -EFAULT : 0;
3589 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3592 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3593 * switch in the run ioctl. Let's update our copies before we save
3594 * it into the save area
3597 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3598 save_access_regs(vcpu->run->s.regs.acrs);
3600 return kvm_s390_store_status_unloaded(vcpu, addr);
3603 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3605 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3606 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3609 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3612 struct kvm_vcpu *vcpu;
3614 kvm_for_each_vcpu(i, vcpu, kvm) {
3615 __disable_ibs_on_vcpu(vcpu);
3619 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3623 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3624 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3627 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3629 int i, online_vcpus, started_vcpus = 0;
3631 if (!is_vcpu_stopped(vcpu))
3634 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3635 /* Only one cpu at a time may enter/leave the STOPPED state. */
3636 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3637 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3639 for (i = 0; i < online_vcpus; i++) {
3640 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3644 if (started_vcpus == 0) {
3645 /* we're the only active VCPU -> speed it up */
3646 __enable_ibs_on_vcpu(vcpu);
3647 } else if (started_vcpus == 1) {
3649 * As we are starting a second VCPU, we have to disable
3650 * the IBS facility on all VCPUs to remove potentially
3651 * oustanding ENABLE requests.
3653 __disable_ibs_on_all_vcpus(vcpu->kvm);
3656 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3658 * Another VCPU might have used IBS while we were offline.
3659 * Let's play safe and flush the VCPU at startup.
3661 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3662 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3666 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3668 int i, online_vcpus, started_vcpus = 0;
3669 struct kvm_vcpu *started_vcpu = NULL;
3671 if (is_vcpu_stopped(vcpu))
3674 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3675 /* Only one cpu at a time may enter/leave the STOPPED state. */
3676 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3677 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3679 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3680 kvm_s390_clear_stop_irq(vcpu);
3682 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3683 __disable_ibs_on_vcpu(vcpu);
3685 for (i = 0; i < online_vcpus; i++) {
3686 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3688 started_vcpu = vcpu->kvm->vcpus[i];
3692 if (started_vcpus == 1) {
3694 * As we only have one VCPU left, we want to enable the
3695 * IBS facility for that VCPU to speed it up.
3697 __enable_ibs_on_vcpu(started_vcpu);
3700 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3704 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3705 struct kvm_enable_cap *cap)
3713 case KVM_CAP_S390_CSS_SUPPORT:
3714 if (!vcpu->kvm->arch.css_support) {
3715 vcpu->kvm->arch.css_support = 1;
3716 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3717 trace_kvm_s390_enable_css(vcpu->kvm);
3728 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3729 struct kvm_s390_mem_op *mop)
3731 void __user *uaddr = (void __user *)mop->buf;
3732 void *tmpbuf = NULL;
3734 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3735 | KVM_S390_MEMOP_F_CHECK_ONLY;
3737 if (mop->flags & ~supported_flags)
3740 if (mop->size > MEM_OP_MAX_SIZE)
3743 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3744 tmpbuf = vmalloc(mop->size);
3749 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3752 case KVM_S390_MEMOP_LOGICAL_READ:
3753 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3754 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3755 mop->size, GACC_FETCH);
3758 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3760 if (copy_to_user(uaddr, tmpbuf, mop->size))
3764 case KVM_S390_MEMOP_LOGICAL_WRITE:
3765 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3766 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3767 mop->size, GACC_STORE);
3770 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3774 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3780 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3782 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3783 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3789 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3790 unsigned int ioctl, unsigned long arg)
3792 struct kvm_vcpu *vcpu = filp->private_data;
3793 void __user *argp = (void __user *)arg;
3796 case KVM_S390_IRQ: {
3797 struct kvm_s390_irq s390irq;
3799 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3801 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3803 case KVM_S390_INTERRUPT: {
3804 struct kvm_s390_interrupt s390int;
3805 struct kvm_s390_irq s390irq;
3807 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3809 if (s390int_to_s390irq(&s390int, &s390irq))
3811 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3814 return -ENOIOCTLCMD;
3817 long kvm_arch_vcpu_ioctl(struct file *filp,
3818 unsigned int ioctl, unsigned long arg)
3820 struct kvm_vcpu *vcpu = filp->private_data;
3821 void __user *argp = (void __user *)arg;
3828 case KVM_S390_STORE_STATUS:
3829 idx = srcu_read_lock(&vcpu->kvm->srcu);
3830 r = kvm_s390_vcpu_store_status(vcpu, arg);
3831 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3833 case KVM_S390_SET_INITIAL_PSW: {
3837 if (copy_from_user(&psw, argp, sizeof(psw)))
3839 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3842 case KVM_S390_INITIAL_RESET:
3843 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3845 case KVM_SET_ONE_REG:
3846 case KVM_GET_ONE_REG: {
3847 struct kvm_one_reg reg;
3849 if (copy_from_user(®, argp, sizeof(reg)))
3851 if (ioctl == KVM_SET_ONE_REG)
3852 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3854 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3857 #ifdef CONFIG_KVM_S390_UCONTROL
3858 case KVM_S390_UCAS_MAP: {
3859 struct kvm_s390_ucas_mapping ucasmap;
3861 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3866 if (!kvm_is_ucontrol(vcpu->kvm)) {
3871 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3872 ucasmap.vcpu_addr, ucasmap.length);
3875 case KVM_S390_UCAS_UNMAP: {
3876 struct kvm_s390_ucas_mapping ucasmap;
3878 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3883 if (!kvm_is_ucontrol(vcpu->kvm)) {
3888 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3893 case KVM_S390_VCPU_FAULT: {
3894 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3897 case KVM_ENABLE_CAP:
3899 struct kvm_enable_cap cap;
3901 if (copy_from_user(&cap, argp, sizeof(cap)))
3903 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3906 case KVM_S390_MEM_OP: {
3907 struct kvm_s390_mem_op mem_op;
3909 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3910 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3915 case KVM_S390_SET_IRQ_STATE: {
3916 struct kvm_s390_irq_state irq_state;
3919 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3921 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3922 irq_state.len == 0 ||
3923 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3927 /* do not use irq_state.flags, it will break old QEMUs */
3928 r = kvm_s390_set_irq_state(vcpu,
3929 (void __user *) irq_state.buf,
3933 case KVM_S390_GET_IRQ_STATE: {
3934 struct kvm_s390_irq_state irq_state;
3937 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3939 if (irq_state.len == 0) {
3943 /* do not use irq_state.flags, it will break old QEMUs */
3944 r = kvm_s390_get_irq_state(vcpu,
3945 (__u8 __user *) irq_state.buf,
3957 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3959 #ifdef CONFIG_KVM_S390_UCONTROL
3960 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3961 && (kvm_is_ucontrol(vcpu->kvm))) {
3962 vmf->page = virt_to_page(vcpu->arch.sie_block);
3963 get_page(vmf->page);
3967 return VM_FAULT_SIGBUS;
3970 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3971 unsigned long npages)
3976 /* Section: memory related */
3977 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3978 struct kvm_memory_slot *memslot,
3979 const struct kvm_userspace_memory_region *mem,
3980 enum kvm_mr_change change)
3982 /* A few sanity checks. We can have memory slots which have to be
3983 located/ended at a segment boundary (1MB). The memory in userland is
3984 ok to be fragmented into various different vmas. It is okay to mmap()
3985 and munmap() stuff in this slot after doing this call at any time */
3987 if (mem->userspace_addr & 0xffffful)
3990 if (mem->memory_size & 0xffffful)
3993 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3999 void kvm_arch_commit_memory_region(struct kvm *kvm,
4000 const struct kvm_userspace_memory_region *mem,
4001 const struct kvm_memory_slot *old,
4002 const struct kvm_memory_slot *new,
4003 enum kvm_mr_change change)
4007 /* If the basics of the memslot do not change, we do not want
4008 * to update the gmap. Every update causes several unnecessary
4009 * segment translation exceptions. This is usually handled just
4010 * fine by the normal fault handler + gmap, but it will also
4011 * cause faults on the prefix page of running guest CPUs.
4013 if (old->userspace_addr == mem->userspace_addr &&
4014 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4015 old->npages * PAGE_SIZE == mem->memory_size)
4018 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4019 mem->guest_phys_addr, mem->memory_size);
4021 pr_warn("failed to commit memory region\n");
4025 static inline unsigned long nonhyp_mask(int i)
4027 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4029 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4032 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4034 vcpu->valid_wakeup = false;
4037 static int __init kvm_s390_init(void)
4041 if (!sclp.has_sief2) {
4042 pr_info("SIE not available\n");
4046 for (i = 0; i < 16; i++)
4047 kvm_s390_fac_list_mask[i] |=
4048 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4050 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4053 static void __exit kvm_s390_exit(void)
4058 module_init(kvm_s390_init);
4059 module_exit(kvm_s390_exit);
4062 * Enable autoloading of the kvm module.
4063 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4064 * since x86 takes a different approach.
4066 #include <linux/miscdevice.h>
4067 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4068 MODULE_ALIAS("devname:kvm");