1 // SPDX-License-Identifier: GPL-2.0
3 * hosting zSeries kernel virtual machines
5 * Copyright IBM Corp. 2008, 2009
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
11 * Author(s): Carsten Otte <cotte@de.ibm.com>
12 * Christian Borntraeger <borntraeger@de.ibm.com>
13 * Heiko Carstens <heiko.carstens@de.ibm.com>
14 * Christian Ehrhardt <ehrhardt@de.ibm.com>
15 * Jason J. Herne <jjherne@us.ibm.com>
18 #include <linux/compiler.h>
19 #include <linux/err.h>
21 #include <linux/hrtimer.h>
22 #include <linux/init.h>
23 #include <linux/kvm.h>
24 #include <linux/kvm_host.h>
25 #include <linux/mman.h>
26 #include <linux/module.h>
27 #include <linux/moduleparam.h>
28 #include <linux/random.h>
29 #include <linux/slab.h>
30 #include <linux/timer.h>
31 #include <linux/vmalloc.h>
32 #include <linux/bitmap.h>
33 #include <linux/sched/signal.h>
34 #include <linux/string.h>
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
39 #include <asm/pgtable.h>
42 #include <asm/switch_to.h>
45 #include <asm/cpacf.h>
46 #include <asm/timex.h>
50 #define KMSG_COMPONENT "kvm-s390"
52 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
54 #define CREATE_TRACE_POINTS
56 #include "trace-s390.h"
58 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
60 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
61 (KVM_MAX_VCPUS + LOCAL_IRQS))
63 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
65 struct kvm_stats_debugfs_item debugfs_entries[] = {
66 { "userspace_handled", VCPU_STAT(exit_userspace) },
67 { "exit_null", VCPU_STAT(exit_null) },
68 { "exit_validity", VCPU_STAT(exit_validity) },
69 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
70 { "exit_external_request", VCPU_STAT(exit_external_request) },
71 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
72 { "exit_instruction", VCPU_STAT(exit_instruction) },
73 { "exit_pei", VCPU_STAT(exit_pei) },
74 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
75 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
76 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
77 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
78 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
79 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
80 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
81 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
82 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
83 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
84 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
85 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
87 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
89 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
93 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
94 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
95 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
96 { "instruction_spx", VCPU_STAT(instruction_spx) },
97 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
98 { "instruction_stap", VCPU_STAT(instruction_stap) },
99 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
100 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
101 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
102 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
103 { "instruction_essa", VCPU_STAT(instruction_essa) },
104 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
105 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
106 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
107 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
108 { "instruction_sie", VCPU_STAT(instruction_sie) },
109 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
110 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
111 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
112 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
113 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
114 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
115 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
116 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
117 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
118 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
119 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
120 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
121 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
122 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
123 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
124 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
125 { "diagnose_10", VCPU_STAT(diagnose_10) },
126 { "diagnose_44", VCPU_STAT(diagnose_44) },
127 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
128 { "diagnose_258", VCPU_STAT(diagnose_258) },
129 { "diagnose_308", VCPU_STAT(diagnose_308) },
130 { "diagnose_500", VCPU_STAT(diagnose_500) },
134 struct kvm_s390_tod_clock_ext {
140 /* allow nested virtualization in KVM (if enabled by user space) */
142 module_param(nested, int, S_IRUGO);
143 MODULE_PARM_DESC(nested, "Nested virtualization support");
145 /* upper facilities limit for kvm */
146 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
148 unsigned long kvm_s390_fac_list_mask_size(void)
150 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
151 return ARRAY_SIZE(kvm_s390_fac_list_mask);
154 /* available cpu features supported by kvm */
155 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
156 /* available subfunctions indicated via query / "test bit" */
157 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
159 static struct gmap_notifier gmap_notifier;
160 static struct gmap_notifier vsie_gmap_notifier;
161 debug_info_t *kvm_s390_dbf;
163 /* Section: not file related */
164 int kvm_arch_hardware_enable(void)
166 /* every s390 is virtualization enabled ;-) */
170 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
174 * This callback is executed during stop_machine(). All CPUs are therefore
175 * temporarily stopped. In order not to change guest behavior, we have to
176 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
177 * so a CPU won't be stopped while calculating with the epoch.
179 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
183 struct kvm_vcpu *vcpu;
185 unsigned long long *delta = v;
187 list_for_each_entry(kvm, &vm_list, vm_list) {
188 kvm->arch.epoch -= *delta;
189 kvm_for_each_vcpu(i, vcpu, kvm) {
190 vcpu->arch.sie_block->epoch -= *delta;
191 if (vcpu->arch.cputm_enabled)
192 vcpu->arch.cputm_start += *delta;
193 if (vcpu->arch.vsie_block)
194 vcpu->arch.vsie_block->epoch -= *delta;
200 static struct notifier_block kvm_clock_notifier = {
201 .notifier_call = kvm_clock_sync,
204 int kvm_arch_hardware_setup(void)
206 gmap_notifier.notifier_call = kvm_gmap_notifier;
207 gmap_register_pte_notifier(&gmap_notifier);
208 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
209 gmap_register_pte_notifier(&vsie_gmap_notifier);
210 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
211 &kvm_clock_notifier);
215 void kvm_arch_hardware_unsetup(void)
217 gmap_unregister_pte_notifier(&gmap_notifier);
218 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
219 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
220 &kvm_clock_notifier);
223 static void allow_cpu_feat(unsigned long nr)
225 set_bit_inv(nr, kvm_s390_available_cpu_feat);
228 static inline int plo_test_bit(unsigned char nr)
230 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
234 /* Parameter registers are ignored for "test bit" */
244 static void kvm_s390_cpu_feat_init(void)
248 for (i = 0; i < 256; ++i) {
250 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
253 if (test_facility(28)) /* TOD-clock steering */
254 ptff(kvm_s390_available_subfunc.ptff,
255 sizeof(kvm_s390_available_subfunc.ptff),
258 if (test_facility(17)) { /* MSA */
259 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
260 kvm_s390_available_subfunc.kmac);
261 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
262 kvm_s390_available_subfunc.kmc);
263 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
264 kvm_s390_available_subfunc.km);
265 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
266 kvm_s390_available_subfunc.kimd);
267 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.klmd);
270 if (test_facility(76)) /* MSA3 */
271 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
272 kvm_s390_available_subfunc.pckmo);
273 if (test_facility(77)) { /* MSA4 */
274 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
275 kvm_s390_available_subfunc.kmctr);
276 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
277 kvm_s390_available_subfunc.kmf);
278 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
279 kvm_s390_available_subfunc.kmo);
280 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
281 kvm_s390_available_subfunc.pcc);
283 if (test_facility(57)) /* MSA5 */
284 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
285 kvm_s390_available_subfunc.ppno);
287 if (test_facility(146)) /* MSA8 */
288 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
289 kvm_s390_available_subfunc.kma);
291 if (MACHINE_HAS_ESOP)
292 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
294 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
295 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
297 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
298 !test_facility(3) || !nested)
300 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
301 if (sclp.has_64bscao)
302 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
304 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
306 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
308 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
310 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
312 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
314 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
316 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
318 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
319 * all skey handling functions read/set the skey from the PGSTE
320 * instead of the real storage key.
322 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
323 * pages being detected as preserved although they are resident.
325 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
326 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
328 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
329 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
330 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
332 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
333 * cannot easily shadow the SCA because of the ipte lock.
337 int kvm_arch_init(void *opaque)
339 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
343 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
344 debug_unregister(kvm_s390_dbf);
348 kvm_s390_cpu_feat_init();
350 /* Register floating interrupt controller interface. */
351 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
354 void kvm_arch_exit(void)
356 debug_unregister(kvm_s390_dbf);
359 /* Section: device related */
360 long kvm_arch_dev_ioctl(struct file *filp,
361 unsigned int ioctl, unsigned long arg)
363 if (ioctl == KVM_S390_ENABLE_SIE)
364 return s390_enable_sie();
368 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
373 case KVM_CAP_S390_PSW:
374 case KVM_CAP_S390_GMAP:
375 case KVM_CAP_SYNC_MMU:
376 #ifdef CONFIG_KVM_S390_UCONTROL
377 case KVM_CAP_S390_UCONTROL:
379 case KVM_CAP_ASYNC_PF:
380 case KVM_CAP_SYNC_REGS:
381 case KVM_CAP_ONE_REG:
382 case KVM_CAP_ENABLE_CAP:
383 case KVM_CAP_S390_CSS_SUPPORT:
384 case KVM_CAP_IOEVENTFD:
385 case KVM_CAP_DEVICE_CTRL:
386 case KVM_CAP_ENABLE_CAP_VM:
387 case KVM_CAP_S390_IRQCHIP:
388 case KVM_CAP_VM_ATTRIBUTES:
389 case KVM_CAP_MP_STATE:
390 case KVM_CAP_IMMEDIATE_EXIT:
391 case KVM_CAP_S390_INJECT_IRQ:
392 case KVM_CAP_S390_USER_SIGP:
393 case KVM_CAP_S390_USER_STSI:
394 case KVM_CAP_S390_SKEYS:
395 case KVM_CAP_S390_IRQ_STATE:
396 case KVM_CAP_S390_USER_INSTR0:
397 case KVM_CAP_S390_CMMA_MIGRATION:
398 case KVM_CAP_S390_AIS:
399 case KVM_CAP_S390_AIS_MIGRATION:
402 case KVM_CAP_S390_MEM_OP:
405 case KVM_CAP_NR_VCPUS:
406 case KVM_CAP_MAX_VCPUS:
407 r = KVM_S390_BSCA_CPU_SLOTS;
408 if (!kvm_s390_use_sca_entries())
410 else if (sclp.has_esca && sclp.has_64bscao)
411 r = KVM_S390_ESCA_CPU_SLOTS;
413 case KVM_CAP_NR_MEMSLOTS:
414 r = KVM_USER_MEM_SLOTS;
416 case KVM_CAP_S390_COW:
417 r = MACHINE_HAS_ESOP;
419 case KVM_CAP_S390_VECTOR_REGISTERS:
422 case KVM_CAP_S390_RI:
423 r = test_facility(64);
425 case KVM_CAP_S390_GS:
426 r = test_facility(133);
434 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
435 struct kvm_memory_slot *memslot)
437 gfn_t cur_gfn, last_gfn;
438 unsigned long address;
439 struct gmap *gmap = kvm->arch.gmap;
441 /* Loop over all guest pages */
442 last_gfn = memslot->base_gfn + memslot->npages;
443 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
444 address = gfn_to_hva_memslot(memslot, cur_gfn);
446 if (test_and_clear_guest_dirty(gmap->mm, address))
447 mark_page_dirty(kvm, cur_gfn);
448 if (fatal_signal_pending(current))
454 /* Section: vm related */
455 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
458 * Get (and clear) the dirty memory log for a memory slot.
460 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
461 struct kvm_dirty_log *log)
465 struct kvm_memslots *slots;
466 struct kvm_memory_slot *memslot;
469 if (kvm_is_ucontrol(kvm))
472 mutex_lock(&kvm->slots_lock);
475 if (log->slot >= KVM_USER_MEM_SLOTS)
478 slots = kvm_memslots(kvm);
479 memslot = id_to_memslot(slots, log->slot);
481 if (!memslot->dirty_bitmap)
484 kvm_s390_sync_dirty_log(kvm, memslot);
485 r = kvm_get_dirty_log(kvm, log, &is_dirty);
489 /* Clear the dirty log */
491 n = kvm_dirty_bitmap_bytes(memslot);
492 memset(memslot->dirty_bitmap, 0, n);
496 mutex_unlock(&kvm->slots_lock);
500 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
503 struct kvm_vcpu *vcpu;
505 kvm_for_each_vcpu(i, vcpu, kvm) {
506 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
510 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
518 case KVM_CAP_S390_IRQCHIP:
519 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
520 kvm->arch.use_irqchip = 1;
523 case KVM_CAP_S390_USER_SIGP:
524 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
525 kvm->arch.user_sigp = 1;
528 case KVM_CAP_S390_VECTOR_REGISTERS:
529 mutex_lock(&kvm->lock);
530 if (kvm->created_vcpus) {
532 } else if (MACHINE_HAS_VX) {
533 set_kvm_facility(kvm->arch.model.fac_mask, 129);
534 set_kvm_facility(kvm->arch.model.fac_list, 129);
535 if (test_facility(134)) {
536 set_kvm_facility(kvm->arch.model.fac_mask, 134);
537 set_kvm_facility(kvm->arch.model.fac_list, 134);
539 if (test_facility(135)) {
540 set_kvm_facility(kvm->arch.model.fac_mask, 135);
541 set_kvm_facility(kvm->arch.model.fac_list, 135);
546 mutex_unlock(&kvm->lock);
547 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
548 r ? "(not available)" : "(success)");
550 case KVM_CAP_S390_RI:
552 mutex_lock(&kvm->lock);
553 if (kvm->created_vcpus) {
555 } else if (test_facility(64)) {
556 set_kvm_facility(kvm->arch.model.fac_mask, 64);
557 set_kvm_facility(kvm->arch.model.fac_list, 64);
560 mutex_unlock(&kvm->lock);
561 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
562 r ? "(not available)" : "(success)");
564 case KVM_CAP_S390_AIS:
565 mutex_lock(&kvm->lock);
566 if (kvm->created_vcpus) {
569 set_kvm_facility(kvm->arch.model.fac_mask, 72);
570 set_kvm_facility(kvm->arch.model.fac_list, 72);
573 mutex_unlock(&kvm->lock);
574 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
575 r ? "(not available)" : "(success)");
577 case KVM_CAP_S390_GS:
579 mutex_lock(&kvm->lock);
580 if (atomic_read(&kvm->online_vcpus)) {
582 } else if (test_facility(133)) {
583 set_kvm_facility(kvm->arch.model.fac_mask, 133);
584 set_kvm_facility(kvm->arch.model.fac_list, 133);
587 mutex_unlock(&kvm->lock);
588 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
589 r ? "(not available)" : "(success)");
591 case KVM_CAP_S390_USER_STSI:
592 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
593 kvm->arch.user_stsi = 1;
596 case KVM_CAP_S390_USER_INSTR0:
597 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
598 kvm->arch.user_instr0 = 1;
599 icpt_operexc_on_all_vcpus(kvm);
609 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
613 switch (attr->attr) {
614 case KVM_S390_VM_MEM_LIMIT_SIZE:
616 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
617 kvm->arch.mem_limit);
618 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
628 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
632 switch (attr->attr) {
633 case KVM_S390_VM_MEM_ENABLE_CMMA:
639 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
640 mutex_lock(&kvm->lock);
641 if (!kvm->created_vcpus) {
642 kvm->arch.use_cmma = 1;
645 mutex_unlock(&kvm->lock);
647 case KVM_S390_VM_MEM_CLR_CMMA:
652 if (!kvm->arch.use_cmma)
655 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
656 mutex_lock(&kvm->lock);
657 idx = srcu_read_lock(&kvm->srcu);
658 s390_reset_cmma(kvm->arch.gmap->mm);
659 srcu_read_unlock(&kvm->srcu, idx);
660 mutex_unlock(&kvm->lock);
663 case KVM_S390_VM_MEM_LIMIT_SIZE: {
664 unsigned long new_limit;
666 if (kvm_is_ucontrol(kvm))
669 if (get_user(new_limit, (u64 __user *)attr->addr))
672 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
673 new_limit > kvm->arch.mem_limit)
679 /* gmap_create takes last usable address */
680 if (new_limit != KVM_S390_NO_MEM_LIMIT)
684 mutex_lock(&kvm->lock);
685 if (!kvm->created_vcpus) {
686 /* gmap_create will round the limit up */
687 struct gmap *new = gmap_create(current->mm, new_limit);
692 gmap_remove(kvm->arch.gmap);
694 kvm->arch.gmap = new;
698 mutex_unlock(&kvm->lock);
699 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
700 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
701 (void *) kvm->arch.gmap->asce);
711 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
713 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
715 struct kvm_vcpu *vcpu;
718 if (!test_kvm_facility(kvm, 76))
721 mutex_lock(&kvm->lock);
722 switch (attr->attr) {
723 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
725 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
726 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
727 kvm->arch.crypto.aes_kw = 1;
728 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
730 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
732 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
733 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
734 kvm->arch.crypto.dea_kw = 1;
735 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
737 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
738 kvm->arch.crypto.aes_kw = 0;
739 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
740 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
741 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
743 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
744 kvm->arch.crypto.dea_kw = 0;
745 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
746 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
747 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
750 mutex_unlock(&kvm->lock);
754 kvm_for_each_vcpu(i, vcpu, kvm) {
755 kvm_s390_vcpu_crypto_setup(vcpu);
758 mutex_unlock(&kvm->lock);
762 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
765 struct kvm_vcpu *vcpu;
767 kvm_for_each_vcpu(cx, vcpu, kvm)
768 kvm_s390_sync_request(req, vcpu);
772 * Must be called with kvm->srcu held to avoid races on memslots, and with
773 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
775 static int kvm_s390_vm_start_migration(struct kvm *kvm)
777 struct kvm_s390_migration_state *mgs;
778 struct kvm_memory_slot *ms;
779 /* should be the only one */
780 struct kvm_memslots *slots;
781 unsigned long ram_pages;
784 /* migration mode already enabled */
785 if (kvm->arch.migration_state)
788 slots = kvm_memslots(kvm);
789 if (!slots || !slots->used_slots)
792 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
795 kvm->arch.migration_state = mgs;
797 if (kvm->arch.use_cmma) {
799 * Get the last slot. They should be sorted by base_gfn, so the
800 * last slot is also the one at the end of the address space.
801 * We have verified above that at least one slot is present.
803 ms = slots->memslots + slots->used_slots - 1;
804 /* round up so we only use full longs */
805 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
806 /* allocate enough bytes to store all the bits */
807 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
808 if (!mgs->pgste_bitmap) {
810 kvm->arch.migration_state = NULL;
814 mgs->bitmap_size = ram_pages;
815 atomic64_set(&mgs->dirty_pages, ram_pages);
816 /* mark all the pages in active slots as dirty */
817 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
818 ms = slots->memslots + slotnr;
819 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
822 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
828 * Must be called with kvm->lock to avoid races with ourselves and
829 * kvm_s390_vm_start_migration.
831 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
833 struct kvm_s390_migration_state *mgs;
835 /* migration mode already disabled */
836 if (!kvm->arch.migration_state)
838 mgs = kvm->arch.migration_state;
839 kvm->arch.migration_state = NULL;
841 if (kvm->arch.use_cmma) {
842 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
843 vfree(mgs->pgste_bitmap);
849 static int kvm_s390_vm_set_migration(struct kvm *kvm,
850 struct kvm_device_attr *attr)
852 int idx, res = -ENXIO;
854 mutex_lock(&kvm->lock);
855 switch (attr->attr) {
856 case KVM_S390_VM_MIGRATION_START:
857 idx = srcu_read_lock(&kvm->srcu);
858 res = kvm_s390_vm_start_migration(kvm);
859 srcu_read_unlock(&kvm->srcu, idx);
861 case KVM_S390_VM_MIGRATION_STOP:
862 res = kvm_s390_vm_stop_migration(kvm);
867 mutex_unlock(&kvm->lock);
872 static int kvm_s390_vm_get_migration(struct kvm *kvm,
873 struct kvm_device_attr *attr)
875 u64 mig = (kvm->arch.migration_state != NULL);
877 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
880 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
885 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
887 struct kvm_s390_vm_tod_clock gtod;
889 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
892 if (test_kvm_facility(kvm, 139))
893 kvm_s390_set_tod_clock_ext(kvm, >od);
894 else if (gtod.epoch_idx == 0)
895 kvm_s390_set_tod_clock(kvm, gtod.tod);
899 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
900 gtod.epoch_idx, gtod.tod);
905 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
909 if (copy_from_user(>od_high, (void __user *)attr->addr,
915 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
920 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
924 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
927 kvm_s390_set_tod_clock(kvm, gtod);
928 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
932 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
939 switch (attr->attr) {
940 case KVM_S390_VM_TOD_EXT:
941 ret = kvm_s390_set_tod_ext(kvm, attr);
943 case KVM_S390_VM_TOD_HIGH:
944 ret = kvm_s390_set_tod_high(kvm, attr);
946 case KVM_S390_VM_TOD_LOW:
947 ret = kvm_s390_set_tod_low(kvm, attr);
956 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
957 struct kvm_s390_vm_tod_clock *gtod)
959 struct kvm_s390_tod_clock_ext htod;
963 get_tod_clock_ext((char *)&htod);
965 gtod->tod = htod.tod + kvm->arch.epoch;
966 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
968 if (gtod->tod < htod.tod)
969 gtod->epoch_idx += 1;
974 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
976 struct kvm_s390_vm_tod_clock gtod;
978 memset(>od, 0, sizeof(gtod));
980 if (test_kvm_facility(kvm, 139))
981 kvm_s390_get_tod_clock_ext(kvm, >od);
983 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
985 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
988 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
989 gtod.epoch_idx, gtod.tod);
993 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
997 if (copy_to_user((void __user *)attr->addr, >od_high,
1000 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1005 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1009 gtod = kvm_s390_get_tod_clock_fast(kvm);
1010 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1012 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1017 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1024 switch (attr->attr) {
1025 case KVM_S390_VM_TOD_EXT:
1026 ret = kvm_s390_get_tod_ext(kvm, attr);
1028 case KVM_S390_VM_TOD_HIGH:
1029 ret = kvm_s390_get_tod_high(kvm, attr);
1031 case KVM_S390_VM_TOD_LOW:
1032 ret = kvm_s390_get_tod_low(kvm, attr);
1041 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1043 struct kvm_s390_vm_cpu_processor *proc;
1044 u16 lowest_ibc, unblocked_ibc;
1047 mutex_lock(&kvm->lock);
1048 if (kvm->created_vcpus) {
1052 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1057 if (!copy_from_user(proc, (void __user *)attr->addr,
1059 kvm->arch.model.cpuid = proc->cpuid;
1060 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1061 unblocked_ibc = sclp.ibc & 0xfff;
1062 if (lowest_ibc && proc->ibc) {
1063 if (proc->ibc > unblocked_ibc)
1064 kvm->arch.model.ibc = unblocked_ibc;
1065 else if (proc->ibc < lowest_ibc)
1066 kvm->arch.model.ibc = lowest_ibc;
1068 kvm->arch.model.ibc = proc->ibc;
1070 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1071 S390_ARCH_FAC_LIST_SIZE_BYTE);
1072 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1073 kvm->arch.model.ibc,
1074 kvm->arch.model.cpuid);
1075 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1076 kvm->arch.model.fac_list[0],
1077 kvm->arch.model.fac_list[1],
1078 kvm->arch.model.fac_list[2]);
1083 mutex_unlock(&kvm->lock);
1087 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1088 struct kvm_device_attr *attr)
1090 struct kvm_s390_vm_cpu_feat data;
1093 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1095 if (!bitmap_subset((unsigned long *) data.feat,
1096 kvm_s390_available_cpu_feat,
1097 KVM_S390_VM_CPU_FEAT_NR_BITS))
1100 mutex_lock(&kvm->lock);
1101 if (!atomic_read(&kvm->online_vcpus)) {
1102 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1103 KVM_S390_VM_CPU_FEAT_NR_BITS);
1106 mutex_unlock(&kvm->lock);
1110 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1111 struct kvm_device_attr *attr)
1114 * Once supported by kernel + hw, we have to store the subfunctions
1115 * in kvm->arch and remember that user space configured them.
1120 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1124 switch (attr->attr) {
1125 case KVM_S390_VM_CPU_PROCESSOR:
1126 ret = kvm_s390_set_processor(kvm, attr);
1128 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1129 ret = kvm_s390_set_processor_feat(kvm, attr);
1131 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1132 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1138 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1140 struct kvm_s390_vm_cpu_processor *proc;
1143 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1148 proc->cpuid = kvm->arch.model.cpuid;
1149 proc->ibc = kvm->arch.model.ibc;
1150 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1151 S390_ARCH_FAC_LIST_SIZE_BYTE);
1152 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1153 kvm->arch.model.ibc,
1154 kvm->arch.model.cpuid);
1155 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1156 kvm->arch.model.fac_list[0],
1157 kvm->arch.model.fac_list[1],
1158 kvm->arch.model.fac_list[2]);
1159 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1166 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1168 struct kvm_s390_vm_cpu_machine *mach;
1171 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1176 get_cpu_id((struct cpuid *) &mach->cpuid);
1177 mach->ibc = sclp.ibc;
1178 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1179 S390_ARCH_FAC_LIST_SIZE_BYTE);
1180 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1181 sizeof(S390_lowcore.stfle_fac_list));
1182 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1183 kvm->arch.model.ibc,
1184 kvm->arch.model.cpuid);
1185 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1189 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1193 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1200 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1201 struct kvm_device_attr *attr)
1203 struct kvm_s390_vm_cpu_feat data;
1205 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1206 KVM_S390_VM_CPU_FEAT_NR_BITS);
1207 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1212 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1213 struct kvm_device_attr *attr)
1215 struct kvm_s390_vm_cpu_feat data;
1217 bitmap_copy((unsigned long *) data.feat,
1218 kvm_s390_available_cpu_feat,
1219 KVM_S390_VM_CPU_FEAT_NR_BITS);
1220 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1225 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1226 struct kvm_device_attr *attr)
1229 * Once we can actually configure subfunctions (kernel + hw support),
1230 * we have to check if they were already set by user space, if so copy
1231 * them from kvm->arch.
1236 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1237 struct kvm_device_attr *attr)
1239 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1240 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1244 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1248 switch (attr->attr) {
1249 case KVM_S390_VM_CPU_PROCESSOR:
1250 ret = kvm_s390_get_processor(kvm, attr);
1252 case KVM_S390_VM_CPU_MACHINE:
1253 ret = kvm_s390_get_machine(kvm, attr);
1255 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1256 ret = kvm_s390_get_processor_feat(kvm, attr);
1258 case KVM_S390_VM_CPU_MACHINE_FEAT:
1259 ret = kvm_s390_get_machine_feat(kvm, attr);
1261 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1262 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1264 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1265 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1271 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1275 switch (attr->group) {
1276 case KVM_S390_VM_MEM_CTRL:
1277 ret = kvm_s390_set_mem_control(kvm, attr);
1279 case KVM_S390_VM_TOD:
1280 ret = kvm_s390_set_tod(kvm, attr);
1282 case KVM_S390_VM_CPU_MODEL:
1283 ret = kvm_s390_set_cpu_model(kvm, attr);
1285 case KVM_S390_VM_CRYPTO:
1286 ret = kvm_s390_vm_set_crypto(kvm, attr);
1288 case KVM_S390_VM_MIGRATION:
1289 ret = kvm_s390_vm_set_migration(kvm, attr);
1299 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1303 switch (attr->group) {
1304 case KVM_S390_VM_MEM_CTRL:
1305 ret = kvm_s390_get_mem_control(kvm, attr);
1307 case KVM_S390_VM_TOD:
1308 ret = kvm_s390_get_tod(kvm, attr);
1310 case KVM_S390_VM_CPU_MODEL:
1311 ret = kvm_s390_get_cpu_model(kvm, attr);
1313 case KVM_S390_VM_MIGRATION:
1314 ret = kvm_s390_vm_get_migration(kvm, attr);
1324 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1328 switch (attr->group) {
1329 case KVM_S390_VM_MEM_CTRL:
1330 switch (attr->attr) {
1331 case KVM_S390_VM_MEM_ENABLE_CMMA:
1332 case KVM_S390_VM_MEM_CLR_CMMA:
1333 ret = sclp.has_cmma ? 0 : -ENXIO;
1335 case KVM_S390_VM_MEM_LIMIT_SIZE:
1343 case KVM_S390_VM_TOD:
1344 switch (attr->attr) {
1345 case KVM_S390_VM_TOD_LOW:
1346 case KVM_S390_VM_TOD_HIGH:
1354 case KVM_S390_VM_CPU_MODEL:
1355 switch (attr->attr) {
1356 case KVM_S390_VM_CPU_PROCESSOR:
1357 case KVM_S390_VM_CPU_MACHINE:
1358 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1359 case KVM_S390_VM_CPU_MACHINE_FEAT:
1360 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1363 /* configuring subfunctions is not supported yet */
1364 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1370 case KVM_S390_VM_CRYPTO:
1371 switch (attr->attr) {
1372 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1373 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1374 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1375 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1383 case KVM_S390_VM_MIGRATION:
1394 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1398 int srcu_idx, i, r = 0;
1400 if (args->flags != 0)
1403 /* Is this guest using storage keys? */
1404 if (!mm_use_skey(current->mm))
1405 return KVM_S390_GET_SKEYS_NONE;
1407 /* Enforce sane limit on memory allocation */
1408 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1411 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1415 down_read(¤t->mm->mmap_sem);
1416 srcu_idx = srcu_read_lock(&kvm->srcu);
1417 for (i = 0; i < args->count; i++) {
1418 hva = gfn_to_hva(kvm, args->start_gfn + i);
1419 if (kvm_is_error_hva(hva)) {
1424 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1428 srcu_read_unlock(&kvm->srcu, srcu_idx);
1429 up_read(¤t->mm->mmap_sem);
1432 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1433 sizeof(uint8_t) * args->count);
1442 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1446 int srcu_idx, i, r = 0;
1448 if (args->flags != 0)
1451 /* Enforce sane limit on memory allocation */
1452 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1455 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1459 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1460 sizeof(uint8_t) * args->count);
1466 /* Enable storage key handling for the guest */
1467 r = s390_enable_skey();
1471 down_read(¤t->mm->mmap_sem);
1472 srcu_idx = srcu_read_lock(&kvm->srcu);
1473 for (i = 0; i < args->count; i++) {
1474 hva = gfn_to_hva(kvm, args->start_gfn + i);
1475 if (kvm_is_error_hva(hva)) {
1480 /* Lowest order bit is reserved */
1481 if (keys[i] & 0x01) {
1486 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1490 srcu_read_unlock(&kvm->srcu, srcu_idx);
1491 up_read(¤t->mm->mmap_sem);
1498 * Base address and length must be sent at the start of each block, therefore
1499 * it's cheaper to send some clean data, as long as it's less than the size of
1502 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1503 /* for consistency */
1504 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1507 * This function searches for the next page with dirty CMMA attributes, and
1508 * saves the attributes in the buffer up to either the end of the buffer or
1509 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1510 * no trailing clean bytes are saved.
1511 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1512 * output buffer will indicate 0 as length.
1514 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1515 struct kvm_s390_cmma_log *args)
1517 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1518 unsigned long bufsize, hva, pgstev, i, next, cur;
1519 int srcu_idx, peek, r = 0, rr;
1522 cur = args->start_gfn;
1523 i = next = pgstev = 0;
1525 if (unlikely(!kvm->arch.use_cmma))
1527 /* Invalid/unsupported flags were specified */
1528 if (args->flags & ~KVM_S390_CMMA_PEEK)
1530 /* Migration mode query, and we are not doing a migration */
1531 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1534 /* CMMA is disabled or was not used, or the buffer has length zero */
1535 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1536 if (!bufsize || !kvm->mm->context.use_cmma) {
1537 memset(args, 0, sizeof(*args));
1542 /* We are not peeking, and there are no dirty pages */
1543 if (!atomic64_read(&s->dirty_pages)) {
1544 memset(args, 0, sizeof(*args));
1547 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1549 if (cur >= s->bitmap_size) /* nothing found, loop back */
1550 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1551 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1552 memset(args, 0, sizeof(*args));
1555 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1558 res = vmalloc(bufsize);
1562 args->start_gfn = cur;
1564 down_read(&kvm->mm->mmap_sem);
1565 srcu_idx = srcu_read_lock(&kvm->srcu);
1566 while (i < bufsize) {
1567 hva = gfn_to_hva(kvm, cur);
1568 if (kvm_is_error_hva(hva)) {
1572 /* decrement only if we actually flipped the bit to 0 */
1573 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1574 atomic64_dec(&s->dirty_pages);
1575 r = get_pgste(kvm->mm, hva, &pgstev);
1578 /* save the value */
1579 res[i++] = (pgstev >> 24) & 0x43;
1581 * if the next bit is too far away, stop.
1582 * if we reached the previous "next", find the next one
1585 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1588 next = find_next_bit(s->pgste_bitmap,
1589 s->bitmap_size, cur + 1);
1590 /* reached the end of the bitmap or of the buffer, stop */
1591 if ((next >= s->bitmap_size) ||
1592 (next >= args->start_gfn + bufsize))
1597 srcu_read_unlock(&kvm->srcu, srcu_idx);
1598 up_read(&kvm->mm->mmap_sem);
1600 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1602 rr = copy_to_user((void __user *)args->values, res, args->count);
1611 * This function sets the CMMA attributes for the given pages. If the input
1612 * buffer has zero length, no action is taken, otherwise the attributes are
1613 * set and the mm->context.use_cmma flag is set.
1615 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1616 const struct kvm_s390_cmma_log *args)
1618 unsigned long hva, mask, pgstev, i;
1620 int srcu_idx, r = 0;
1624 if (!kvm->arch.use_cmma)
1626 /* invalid/unsupported flags */
1627 if (args->flags != 0)
1629 /* Enforce sane limit on memory allocation */
1630 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1633 if (args->count == 0)
1636 bits = vmalloc(sizeof(*bits) * args->count);
1640 r = copy_from_user(bits, (void __user *)args->values, args->count);
1646 down_read(&kvm->mm->mmap_sem);
1647 srcu_idx = srcu_read_lock(&kvm->srcu);
1648 for (i = 0; i < args->count; i++) {
1649 hva = gfn_to_hva(kvm, args->start_gfn + i);
1650 if (kvm_is_error_hva(hva)) {
1656 pgstev = pgstev << 24;
1657 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1658 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1660 srcu_read_unlock(&kvm->srcu, srcu_idx);
1661 up_read(&kvm->mm->mmap_sem);
1663 if (!kvm->mm->context.use_cmma) {
1664 down_write(&kvm->mm->mmap_sem);
1665 kvm->mm->context.use_cmma = 1;
1666 up_write(&kvm->mm->mmap_sem);
1673 long kvm_arch_vm_ioctl(struct file *filp,
1674 unsigned int ioctl, unsigned long arg)
1676 struct kvm *kvm = filp->private_data;
1677 void __user *argp = (void __user *)arg;
1678 struct kvm_device_attr attr;
1682 case KVM_S390_INTERRUPT: {
1683 struct kvm_s390_interrupt s390int;
1686 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1688 r = kvm_s390_inject_vm(kvm, &s390int);
1691 case KVM_ENABLE_CAP: {
1692 struct kvm_enable_cap cap;
1694 if (copy_from_user(&cap, argp, sizeof(cap)))
1696 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1699 case KVM_CREATE_IRQCHIP: {
1700 struct kvm_irq_routing_entry routing;
1703 if (kvm->arch.use_irqchip) {
1704 /* Set up dummy routing. */
1705 memset(&routing, 0, sizeof(routing));
1706 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1710 case KVM_SET_DEVICE_ATTR: {
1712 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1714 r = kvm_s390_vm_set_attr(kvm, &attr);
1717 case KVM_GET_DEVICE_ATTR: {
1719 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1721 r = kvm_s390_vm_get_attr(kvm, &attr);
1724 case KVM_HAS_DEVICE_ATTR: {
1726 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1728 r = kvm_s390_vm_has_attr(kvm, &attr);
1731 case KVM_S390_GET_SKEYS: {
1732 struct kvm_s390_skeys args;
1735 if (copy_from_user(&args, argp,
1736 sizeof(struct kvm_s390_skeys)))
1738 r = kvm_s390_get_skeys(kvm, &args);
1741 case KVM_S390_SET_SKEYS: {
1742 struct kvm_s390_skeys args;
1745 if (copy_from_user(&args, argp,
1746 sizeof(struct kvm_s390_skeys)))
1748 r = kvm_s390_set_skeys(kvm, &args);
1751 case KVM_S390_GET_CMMA_BITS: {
1752 struct kvm_s390_cmma_log args;
1755 if (copy_from_user(&args, argp, sizeof(args)))
1757 r = kvm_s390_get_cmma_bits(kvm, &args);
1759 r = copy_to_user(argp, &args, sizeof(args));
1765 case KVM_S390_SET_CMMA_BITS: {
1766 struct kvm_s390_cmma_log args;
1769 if (copy_from_user(&args, argp, sizeof(args)))
1771 r = kvm_s390_set_cmma_bits(kvm, &args);
1781 static int kvm_s390_query_ap_config(u8 *config)
1783 u32 fcn_code = 0x04000000UL;
1786 memset(config, 0, 128);
1790 ".long 0xb2af0000\n" /* PQAP(QCI) */
1796 : "r" (fcn_code), "r" (config)
1797 : "cc", "0", "2", "memory"
1803 static int kvm_s390_apxa_installed(void)
1808 if (test_facility(12)) {
1809 cc = kvm_s390_query_ap_config(config);
1812 pr_err("PQAP(QCI) failed with cc=%d", cc);
1814 return config[0] & 0x40;
1820 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1822 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1824 if (kvm_s390_apxa_installed())
1825 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1827 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1830 static u64 kvm_s390_get_initial_cpuid(void)
1835 cpuid.version = 0xff;
1836 return *((u64 *) &cpuid);
1839 static void kvm_s390_crypto_init(struct kvm *kvm)
1841 if (!test_kvm_facility(kvm, 76))
1844 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1845 kvm_s390_set_crycb_format(kvm);
1847 /* Enable AES/DEA protected key functions by default */
1848 kvm->arch.crypto.aes_kw = 1;
1849 kvm->arch.crypto.dea_kw = 1;
1850 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1851 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1852 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1853 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1856 static void sca_dispose(struct kvm *kvm)
1858 if (kvm->arch.use_esca)
1859 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1861 free_page((unsigned long)(kvm->arch.sca));
1862 kvm->arch.sca = NULL;
1865 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1867 gfp_t alloc_flags = GFP_KERNEL;
1869 char debug_name[16];
1870 static unsigned long sca_offset;
1873 #ifdef CONFIG_KVM_S390_UCONTROL
1874 if (type & ~KVM_VM_S390_UCONTROL)
1876 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1883 rc = s390_enable_sie();
1889 kvm->arch.use_esca = 0; /* start with basic SCA */
1890 if (!sclp.has_64bscao)
1891 alloc_flags |= GFP_DMA;
1892 rwlock_init(&kvm->arch.sca_lock);
1893 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1896 spin_lock(&kvm_lock);
1898 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1900 kvm->arch.sca = (struct bsca_block *)
1901 ((char *) kvm->arch.sca + sca_offset);
1902 spin_unlock(&kvm_lock);
1904 sprintf(debug_name, "kvm-%u", current->pid);
1906 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1910 kvm->arch.sie_page2 =
1911 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1912 if (!kvm->arch.sie_page2)
1915 /* Populate the facility mask initially. */
1916 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1917 sizeof(S390_lowcore.stfle_fac_list));
1918 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1919 if (i < kvm_s390_fac_list_mask_size())
1920 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1922 kvm->arch.model.fac_mask[i] = 0UL;
1925 /* Populate the facility list initially. */
1926 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1927 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1928 S390_ARCH_FAC_LIST_SIZE_BYTE);
1930 /* we are always in czam mode - even on pre z14 machines */
1931 set_kvm_facility(kvm->arch.model.fac_mask, 138);
1932 set_kvm_facility(kvm->arch.model.fac_list, 138);
1933 /* we emulate STHYI in kvm */
1934 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1935 set_kvm_facility(kvm->arch.model.fac_list, 74);
1936 if (MACHINE_HAS_TLB_GUEST) {
1937 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1938 set_kvm_facility(kvm->arch.model.fac_list, 147);
1941 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1942 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1944 kvm_s390_crypto_init(kvm);
1946 mutex_init(&kvm->arch.float_int.ais_lock);
1947 kvm->arch.float_int.simm = 0;
1948 kvm->arch.float_int.nimm = 0;
1949 spin_lock_init(&kvm->arch.float_int.lock);
1950 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1951 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1952 init_waitqueue_head(&kvm->arch.ipte_wq);
1953 mutex_init(&kvm->arch.ipte_mutex);
1955 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1956 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1958 if (type & KVM_VM_S390_UCONTROL) {
1959 kvm->arch.gmap = NULL;
1960 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1962 if (sclp.hamax == U64_MAX)
1963 kvm->arch.mem_limit = TASK_SIZE_MAX;
1965 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1967 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1968 if (!kvm->arch.gmap)
1970 kvm->arch.gmap->private = kvm;
1971 kvm->arch.gmap->pfault_enabled = 0;
1974 kvm->arch.css_support = 0;
1975 kvm->arch.use_irqchip = 0;
1976 kvm->arch.epoch = 0;
1978 spin_lock_init(&kvm->arch.start_stop_lock);
1979 kvm_s390_vsie_init(kvm);
1980 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1984 free_page((unsigned long)kvm->arch.sie_page2);
1985 debug_unregister(kvm->arch.dbf);
1987 KVM_EVENT(3, "creation of vm failed: %d", rc);
1991 bool kvm_arch_has_vcpu_debugfs(void)
1996 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2001 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2003 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2004 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2005 kvm_s390_clear_local_irqs(vcpu);
2006 kvm_clear_async_pf_completion_queue(vcpu);
2007 if (!kvm_is_ucontrol(vcpu->kvm))
2010 if (kvm_is_ucontrol(vcpu->kvm))
2011 gmap_remove(vcpu->arch.gmap);
2013 if (vcpu->kvm->arch.use_cmma)
2014 kvm_s390_vcpu_unsetup_cmma(vcpu);
2015 free_page((unsigned long)(vcpu->arch.sie_block));
2017 kvm_vcpu_uninit(vcpu);
2018 kmem_cache_free(kvm_vcpu_cache, vcpu);
2021 static void kvm_free_vcpus(struct kvm *kvm)
2024 struct kvm_vcpu *vcpu;
2026 kvm_for_each_vcpu(i, vcpu, kvm)
2027 kvm_arch_vcpu_destroy(vcpu);
2029 mutex_lock(&kvm->lock);
2030 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2031 kvm->vcpus[i] = NULL;
2033 atomic_set(&kvm->online_vcpus, 0);
2034 mutex_unlock(&kvm->lock);
2037 void kvm_arch_destroy_vm(struct kvm *kvm)
2039 kvm_free_vcpus(kvm);
2041 debug_unregister(kvm->arch.dbf);
2042 free_page((unsigned long)kvm->arch.sie_page2);
2043 if (!kvm_is_ucontrol(kvm))
2044 gmap_remove(kvm->arch.gmap);
2045 kvm_s390_destroy_adapters(kvm);
2046 kvm_s390_clear_float_irqs(kvm);
2047 kvm_s390_vsie_destroy(kvm);
2048 if (kvm->arch.migration_state) {
2049 vfree(kvm->arch.migration_state->pgste_bitmap);
2050 kfree(kvm->arch.migration_state);
2052 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2055 /* Section: vcpu related */
2056 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2058 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2059 if (!vcpu->arch.gmap)
2061 vcpu->arch.gmap->private = vcpu->kvm;
2066 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2068 if (!kvm_s390_use_sca_entries())
2070 read_lock(&vcpu->kvm->arch.sca_lock);
2071 if (vcpu->kvm->arch.use_esca) {
2072 struct esca_block *sca = vcpu->kvm->arch.sca;
2074 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2075 sca->cpu[vcpu->vcpu_id].sda = 0;
2077 struct bsca_block *sca = vcpu->kvm->arch.sca;
2079 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2080 sca->cpu[vcpu->vcpu_id].sda = 0;
2082 read_unlock(&vcpu->kvm->arch.sca_lock);
2085 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2087 if (!kvm_s390_use_sca_entries()) {
2088 struct bsca_block *sca = vcpu->kvm->arch.sca;
2090 /* we still need the basic sca for the ipte control */
2091 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2092 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2094 read_lock(&vcpu->kvm->arch.sca_lock);
2095 if (vcpu->kvm->arch.use_esca) {
2096 struct esca_block *sca = vcpu->kvm->arch.sca;
2098 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2099 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2100 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2101 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2102 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2104 struct bsca_block *sca = vcpu->kvm->arch.sca;
2106 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2107 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2108 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2109 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2111 read_unlock(&vcpu->kvm->arch.sca_lock);
2114 /* Basic SCA to Extended SCA data copy routines */
2115 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2118 d->sigp_ctrl.c = s->sigp_ctrl.c;
2119 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2122 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2126 d->ipte_control = s->ipte_control;
2128 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2129 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2132 static int sca_switch_to_extended(struct kvm *kvm)
2134 struct bsca_block *old_sca = kvm->arch.sca;
2135 struct esca_block *new_sca;
2136 struct kvm_vcpu *vcpu;
2137 unsigned int vcpu_idx;
2140 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2144 scaoh = (u32)((u64)(new_sca) >> 32);
2145 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2147 kvm_s390_vcpu_block_all(kvm);
2148 write_lock(&kvm->arch.sca_lock);
2150 sca_copy_b_to_e(new_sca, old_sca);
2152 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2153 vcpu->arch.sie_block->scaoh = scaoh;
2154 vcpu->arch.sie_block->scaol = scaol;
2155 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2157 kvm->arch.sca = new_sca;
2158 kvm->arch.use_esca = 1;
2160 write_unlock(&kvm->arch.sca_lock);
2161 kvm_s390_vcpu_unblock_all(kvm);
2163 free_page((unsigned long)old_sca);
2165 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2166 old_sca, kvm->arch.sca);
2170 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2174 if (!kvm_s390_use_sca_entries()) {
2175 if (id < KVM_MAX_VCPUS)
2179 if (id < KVM_S390_BSCA_CPU_SLOTS)
2181 if (!sclp.has_esca || !sclp.has_64bscao)
2184 mutex_lock(&kvm->lock);
2185 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2186 mutex_unlock(&kvm->lock);
2188 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2191 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2193 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2194 kvm_clear_async_pf_completion_queue(vcpu);
2195 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2201 kvm_s390_set_prefix(vcpu, 0);
2202 if (test_kvm_facility(vcpu->kvm, 64))
2203 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2204 if (test_kvm_facility(vcpu->kvm, 133))
2205 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2206 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2207 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2210 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2212 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2214 if (kvm_is_ucontrol(vcpu->kvm))
2215 return __kvm_ucontrol_vcpu_init(vcpu);
2220 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2221 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2223 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2224 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2225 vcpu->arch.cputm_start = get_tod_clock_fast();
2226 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2229 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2230 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2232 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2233 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2234 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2235 vcpu->arch.cputm_start = 0;
2236 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2239 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2240 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2242 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2243 vcpu->arch.cputm_enabled = true;
2244 __start_cpu_timer_accounting(vcpu);
2247 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2248 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2250 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2251 __stop_cpu_timer_accounting(vcpu);
2252 vcpu->arch.cputm_enabled = false;
2255 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2257 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2258 __enable_cpu_timer_accounting(vcpu);
2262 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2264 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2265 __disable_cpu_timer_accounting(vcpu);
2269 /* set the cpu timer - may only be called from the VCPU thread itself */
2270 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2272 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2273 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2274 if (vcpu->arch.cputm_enabled)
2275 vcpu->arch.cputm_start = get_tod_clock_fast();
2276 vcpu->arch.sie_block->cputm = cputm;
2277 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2281 /* update and get the cpu timer - can also be called from other VCPU threads */
2282 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2287 if (unlikely(!vcpu->arch.cputm_enabled))
2288 return vcpu->arch.sie_block->cputm;
2290 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2292 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2294 * If the writer would ever execute a read in the critical
2295 * section, e.g. in irq context, we have a deadlock.
2297 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2298 value = vcpu->arch.sie_block->cputm;
2299 /* if cputm_start is 0, accounting is being started/stopped */
2300 if (likely(vcpu->arch.cputm_start))
2301 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2302 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2307 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2310 gmap_enable(vcpu->arch.enabled_gmap);
2311 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2312 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2313 __start_cpu_timer_accounting(vcpu);
2317 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2320 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2321 __stop_cpu_timer_accounting(vcpu);
2322 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2323 vcpu->arch.enabled_gmap = gmap_get_enabled();
2324 gmap_disable(vcpu->arch.enabled_gmap);
2328 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2330 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2331 vcpu->arch.sie_block->gpsw.mask = 0UL;
2332 vcpu->arch.sie_block->gpsw.addr = 0UL;
2333 kvm_s390_set_prefix(vcpu, 0);
2334 kvm_s390_set_cpu_timer(vcpu, 0);
2335 vcpu->arch.sie_block->ckc = 0UL;
2336 vcpu->arch.sie_block->todpr = 0;
2337 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2338 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2339 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2340 /* make sure the new fpc will be lazily loaded */
2342 current->thread.fpu.fpc = 0;
2343 vcpu->arch.sie_block->gbea = 1;
2344 vcpu->arch.sie_block->pp = 0;
2345 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2346 kvm_clear_async_pf_completion_queue(vcpu);
2347 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2348 kvm_s390_vcpu_stop(vcpu);
2349 kvm_s390_clear_local_irqs(vcpu);
2352 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2354 mutex_lock(&vcpu->kvm->lock);
2356 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2358 mutex_unlock(&vcpu->kvm->lock);
2359 if (!kvm_is_ucontrol(vcpu->kvm)) {
2360 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2363 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2364 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2365 /* make vcpu_load load the right gmap on the first trigger */
2366 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2369 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2371 if (!test_kvm_facility(vcpu->kvm, 76))
2374 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2376 if (vcpu->kvm->arch.crypto.aes_kw)
2377 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2378 if (vcpu->kvm->arch.crypto.dea_kw)
2379 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2381 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2384 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2386 free_page(vcpu->arch.sie_block->cbrlo);
2387 vcpu->arch.sie_block->cbrlo = 0;
2390 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2392 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2393 if (!vcpu->arch.sie_block->cbrlo)
2396 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2400 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2402 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2404 vcpu->arch.sie_block->ibc = model->ibc;
2405 if (test_kvm_facility(vcpu->kvm, 7))
2406 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2409 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2413 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2417 if (test_kvm_facility(vcpu->kvm, 78))
2418 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2419 else if (test_kvm_facility(vcpu->kvm, 8))
2420 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2422 kvm_s390_vcpu_setup_model(vcpu);
2424 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2425 if (MACHINE_HAS_ESOP)
2426 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2427 if (test_kvm_facility(vcpu->kvm, 9))
2428 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2429 if (test_kvm_facility(vcpu->kvm, 73))
2430 vcpu->arch.sie_block->ecb |= ECB_TE;
2432 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2433 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2434 if (test_kvm_facility(vcpu->kvm, 130))
2435 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2436 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2438 vcpu->arch.sie_block->eca |= ECA_CEI;
2440 vcpu->arch.sie_block->eca |= ECA_IB;
2442 vcpu->arch.sie_block->eca |= ECA_SII;
2443 if (sclp.has_sigpif)
2444 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2445 if (test_kvm_facility(vcpu->kvm, 129)) {
2446 vcpu->arch.sie_block->eca |= ECA_VX;
2447 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2449 if (test_kvm_facility(vcpu->kvm, 139))
2450 vcpu->arch.sie_block->ecd |= ECD_MEF;
2452 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2454 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2457 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2459 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2461 if (vcpu->kvm->arch.use_cmma) {
2462 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2466 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2467 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2469 kvm_s390_vcpu_crypto_setup(vcpu);
2474 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2477 struct kvm_vcpu *vcpu;
2478 struct sie_page *sie_page;
2481 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2486 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2490 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2491 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2495 vcpu->arch.sie_block = &sie_page->sie_block;
2496 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2498 /* the real guest size will always be smaller than msl */
2499 vcpu->arch.sie_block->mso = 0;
2500 vcpu->arch.sie_block->msl = sclp.hamax;
2502 vcpu->arch.sie_block->icpua = id;
2503 spin_lock_init(&vcpu->arch.local_int.lock);
2504 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2505 vcpu->arch.local_int.wq = &vcpu->wq;
2506 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2507 seqcount_init(&vcpu->arch.cputm_seqcount);
2509 rc = kvm_vcpu_init(vcpu, kvm, id);
2511 goto out_free_sie_block;
2512 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2513 vcpu->arch.sie_block);
2514 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2518 free_page((unsigned long)(vcpu->arch.sie_block));
2520 kmem_cache_free(kvm_vcpu_cache, vcpu);
2525 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2527 return kvm_s390_vcpu_has_irq(vcpu, 0);
2530 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2532 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2535 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2537 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2541 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2543 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2546 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2548 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2552 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2554 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2558 * Kick a guest cpu out of SIE and wait until SIE is not running.
2559 * If the CPU is not running (e.g. waiting as idle) the function will
2560 * return immediately. */
2561 void exit_sie(struct kvm_vcpu *vcpu)
2563 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2564 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2568 /* Kick a guest cpu out of SIE to process a request synchronously */
2569 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2571 kvm_make_request(req, vcpu);
2572 kvm_s390_vcpu_request(vcpu);
2575 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2578 struct kvm *kvm = gmap->private;
2579 struct kvm_vcpu *vcpu;
2580 unsigned long prefix;
2583 if (gmap_is_shadow(gmap))
2585 if (start >= 1UL << 31)
2586 /* We are only interested in prefix pages */
2588 kvm_for_each_vcpu(i, vcpu, kvm) {
2589 /* match against both prefix pages */
2590 prefix = kvm_s390_get_prefix(vcpu);
2591 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2592 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2594 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2599 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2601 /* kvm common code refers to this, but never calls it */
2606 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2607 struct kvm_one_reg *reg)
2612 case KVM_REG_S390_TODPR:
2613 r = put_user(vcpu->arch.sie_block->todpr,
2614 (u32 __user *)reg->addr);
2616 case KVM_REG_S390_EPOCHDIFF:
2617 r = put_user(vcpu->arch.sie_block->epoch,
2618 (u64 __user *)reg->addr);
2620 case KVM_REG_S390_CPU_TIMER:
2621 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2622 (u64 __user *)reg->addr);
2624 case KVM_REG_S390_CLOCK_COMP:
2625 r = put_user(vcpu->arch.sie_block->ckc,
2626 (u64 __user *)reg->addr);
2628 case KVM_REG_S390_PFTOKEN:
2629 r = put_user(vcpu->arch.pfault_token,
2630 (u64 __user *)reg->addr);
2632 case KVM_REG_S390_PFCOMPARE:
2633 r = put_user(vcpu->arch.pfault_compare,
2634 (u64 __user *)reg->addr);
2636 case KVM_REG_S390_PFSELECT:
2637 r = put_user(vcpu->arch.pfault_select,
2638 (u64 __user *)reg->addr);
2640 case KVM_REG_S390_PP:
2641 r = put_user(vcpu->arch.sie_block->pp,
2642 (u64 __user *)reg->addr);
2644 case KVM_REG_S390_GBEA:
2645 r = put_user(vcpu->arch.sie_block->gbea,
2646 (u64 __user *)reg->addr);
2655 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2656 struct kvm_one_reg *reg)
2662 case KVM_REG_S390_TODPR:
2663 r = get_user(vcpu->arch.sie_block->todpr,
2664 (u32 __user *)reg->addr);
2666 case KVM_REG_S390_EPOCHDIFF:
2667 r = get_user(vcpu->arch.sie_block->epoch,
2668 (u64 __user *)reg->addr);
2670 case KVM_REG_S390_CPU_TIMER:
2671 r = get_user(val, (u64 __user *)reg->addr);
2673 kvm_s390_set_cpu_timer(vcpu, val);
2675 case KVM_REG_S390_CLOCK_COMP:
2676 r = get_user(vcpu->arch.sie_block->ckc,
2677 (u64 __user *)reg->addr);
2679 case KVM_REG_S390_PFTOKEN:
2680 r = get_user(vcpu->arch.pfault_token,
2681 (u64 __user *)reg->addr);
2682 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2683 kvm_clear_async_pf_completion_queue(vcpu);
2685 case KVM_REG_S390_PFCOMPARE:
2686 r = get_user(vcpu->arch.pfault_compare,
2687 (u64 __user *)reg->addr);
2689 case KVM_REG_S390_PFSELECT:
2690 r = get_user(vcpu->arch.pfault_select,
2691 (u64 __user *)reg->addr);
2693 case KVM_REG_S390_PP:
2694 r = get_user(vcpu->arch.sie_block->pp,
2695 (u64 __user *)reg->addr);
2697 case KVM_REG_S390_GBEA:
2698 r = get_user(vcpu->arch.sie_block->gbea,
2699 (u64 __user *)reg->addr);
2708 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2710 kvm_s390_vcpu_initial_reset(vcpu);
2714 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2716 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2720 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2722 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2726 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2727 struct kvm_sregs *sregs)
2729 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2730 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2734 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2735 struct kvm_sregs *sregs)
2737 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2738 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2742 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2744 if (test_fp_ctl(fpu->fpc))
2746 vcpu->run->s.regs.fpc = fpu->fpc;
2748 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2749 (freg_t *) fpu->fprs);
2751 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2755 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2757 /* make sure we have the latest values */
2760 convert_vx_to_fp((freg_t *) fpu->fprs,
2761 (__vector128 *) vcpu->run->s.regs.vrs);
2763 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2764 fpu->fpc = vcpu->run->s.regs.fpc;
2768 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2772 if (!is_vcpu_stopped(vcpu))
2775 vcpu->run->psw_mask = psw.mask;
2776 vcpu->run->psw_addr = psw.addr;
2781 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2782 struct kvm_translation *tr)
2784 return -EINVAL; /* not implemented yet */
2787 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2788 KVM_GUESTDBG_USE_HW_BP | \
2789 KVM_GUESTDBG_ENABLE)
2791 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2792 struct kvm_guest_debug *dbg)
2796 vcpu->guest_debug = 0;
2797 kvm_s390_clear_bp_data(vcpu);
2799 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2801 if (!sclp.has_gpere)
2804 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2805 vcpu->guest_debug = dbg->control;
2806 /* enforce guest PER */
2807 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2809 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2810 rc = kvm_s390_import_bp_data(vcpu, dbg);
2812 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2813 vcpu->arch.guestdbg.last_bp = 0;
2817 vcpu->guest_debug = 0;
2818 kvm_s390_clear_bp_data(vcpu);
2819 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2825 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2826 struct kvm_mp_state *mp_state)
2828 /* CHECK_STOP and LOAD are not supported yet */
2829 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2830 KVM_MP_STATE_OPERATING;
2833 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2834 struct kvm_mp_state *mp_state)
2838 /* user space knows about this interface - let it control the state */
2839 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2841 switch (mp_state->mp_state) {
2842 case KVM_MP_STATE_STOPPED:
2843 kvm_s390_vcpu_stop(vcpu);
2845 case KVM_MP_STATE_OPERATING:
2846 kvm_s390_vcpu_start(vcpu);
2848 case KVM_MP_STATE_LOAD:
2849 case KVM_MP_STATE_CHECK_STOP:
2850 /* fall through - CHECK_STOP and LOAD are not supported yet */
2858 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2860 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2863 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2866 kvm_s390_vcpu_request_handled(vcpu);
2867 if (!kvm_request_pending(vcpu))
2870 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2871 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2872 * This ensures that the ipte instruction for this request has
2873 * already finished. We might race against a second unmapper that
2874 * wants to set the blocking bit. Lets just retry the request loop.
2876 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2878 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2879 kvm_s390_get_prefix(vcpu),
2880 PAGE_SIZE * 2, PROT_WRITE);
2882 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2888 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2889 vcpu->arch.sie_block->ihcpu = 0xffff;
2893 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2894 if (!ibs_enabled(vcpu)) {
2895 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2896 atomic_or(CPUSTAT_IBS,
2897 &vcpu->arch.sie_block->cpuflags);
2902 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2903 if (ibs_enabled(vcpu)) {
2904 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2905 atomic_andnot(CPUSTAT_IBS,
2906 &vcpu->arch.sie_block->cpuflags);
2911 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2912 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2916 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2918 * Disable CMMA virtualization; we will emulate the ESSA
2919 * instruction manually, in order to provide additional
2920 * functionalities needed for live migration.
2922 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2926 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2928 * Re-enable CMMA virtualization if CMMA is available and
2931 if ((vcpu->kvm->arch.use_cmma) &&
2932 (vcpu->kvm->mm->context.use_cmma))
2933 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2937 /* nothing to do, just clear the request */
2938 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2943 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2944 const struct kvm_s390_vm_tod_clock *gtod)
2946 struct kvm_vcpu *vcpu;
2947 struct kvm_s390_tod_clock_ext htod;
2950 mutex_lock(&kvm->lock);
2953 get_tod_clock_ext((char *)&htod);
2955 kvm->arch.epoch = gtod->tod - htod.tod;
2956 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2958 if (kvm->arch.epoch > gtod->tod)
2959 kvm->arch.epdx -= 1;
2961 kvm_s390_vcpu_block_all(kvm);
2962 kvm_for_each_vcpu(i, vcpu, kvm) {
2963 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2964 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
2967 kvm_s390_vcpu_unblock_all(kvm);
2969 mutex_unlock(&kvm->lock);
2972 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2974 struct kvm_vcpu *vcpu;
2977 mutex_lock(&kvm->lock);
2979 kvm->arch.epoch = tod - get_tod_clock();
2980 kvm_s390_vcpu_block_all(kvm);
2981 kvm_for_each_vcpu(i, vcpu, kvm)
2982 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2983 kvm_s390_vcpu_unblock_all(kvm);
2985 mutex_unlock(&kvm->lock);
2989 * kvm_arch_fault_in_page - fault-in guest page if necessary
2990 * @vcpu: The corresponding virtual cpu
2991 * @gpa: Guest physical address
2992 * @writable: Whether the page should be writable or not
2994 * Make sure that a guest page has been faulted-in on the host.
2996 * Return: Zero on success, negative error code otherwise.
2998 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3000 return gmap_fault(vcpu->arch.gmap, gpa,
3001 writable ? FAULT_FLAG_WRITE : 0);
3004 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3005 unsigned long token)
3007 struct kvm_s390_interrupt inti;
3008 struct kvm_s390_irq irq;
3011 irq.u.ext.ext_params2 = token;
3012 irq.type = KVM_S390_INT_PFAULT_INIT;
3013 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3015 inti.type = KVM_S390_INT_PFAULT_DONE;
3016 inti.parm64 = token;
3017 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3021 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3022 struct kvm_async_pf *work)
3024 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3025 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3028 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3029 struct kvm_async_pf *work)
3031 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3032 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3035 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3036 struct kvm_async_pf *work)
3038 /* s390 will always inject the page directly */
3041 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3044 * s390 will always inject the page directly,
3045 * but we still want check_async_completion to cleanup
3050 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3053 struct kvm_arch_async_pf arch;
3056 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3058 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3059 vcpu->arch.pfault_compare)
3061 if (psw_extint_disabled(vcpu))
3063 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3065 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3067 if (!vcpu->arch.gmap->pfault_enabled)
3070 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3071 hva += current->thread.gmap_addr & ~PAGE_MASK;
3072 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3075 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3079 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3084 * On s390 notifications for arriving pages will be delivered directly
3085 * to the guest but the house keeping for completed pfaults is
3086 * handled outside the worker.
3088 kvm_check_async_pf_completion(vcpu);
3090 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3091 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3096 if (test_cpu_flag(CIF_MCCK_PENDING))
3099 if (!kvm_is_ucontrol(vcpu->kvm)) {
3100 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3105 rc = kvm_s390_handle_requests(vcpu);
3109 if (guestdbg_enabled(vcpu)) {
3110 kvm_s390_backup_guest_per_regs(vcpu);
3111 kvm_s390_patch_guest_per_regs(vcpu);
3114 vcpu->arch.sie_block->icptcode = 0;
3115 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3116 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3117 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3122 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3124 struct kvm_s390_pgm_info pgm_info = {
3125 .code = PGM_ADDRESSING,
3130 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3131 trace_kvm_s390_sie_fault(vcpu);
3134 * We want to inject an addressing exception, which is defined as a
3135 * suppressing or terminating exception. However, since we came here
3136 * by a DAT access exception, the PSW still points to the faulting
3137 * instruction since DAT exceptions are nullifying. So we've got
3138 * to look up the current opcode to get the length of the instruction
3139 * to be able to forward the PSW.
3141 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3142 ilen = insn_length(opcode);
3146 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3147 * Forward by arbitrary ilc, injection will take care of
3148 * nullification if necessary.
3150 pgm_info = vcpu->arch.pgm;
3153 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3154 kvm_s390_forward_psw(vcpu, ilen);
3155 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3158 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3160 struct mcck_volatile_info *mcck_info;
3161 struct sie_page *sie_page;
3163 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3164 vcpu->arch.sie_block->icptcode);
3165 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3167 if (guestdbg_enabled(vcpu))
3168 kvm_s390_restore_guest_per_regs(vcpu);
3170 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3171 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3173 if (exit_reason == -EINTR) {
3174 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3175 sie_page = container_of(vcpu->arch.sie_block,
3176 struct sie_page, sie_block);
3177 mcck_info = &sie_page->mcck_info;
3178 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3182 if (vcpu->arch.sie_block->icptcode > 0) {
3183 int rc = kvm_handle_sie_intercept(vcpu);
3185 if (rc != -EOPNOTSUPP)
3187 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3188 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3189 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3190 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3192 } else if (exit_reason != -EFAULT) {
3193 vcpu->stat.exit_null++;
3195 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3196 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3197 vcpu->run->s390_ucontrol.trans_exc_code =
3198 current->thread.gmap_addr;
3199 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3201 } else if (current->thread.gmap_pfault) {
3202 trace_kvm_s390_major_guest_pfault(vcpu);
3203 current->thread.gmap_pfault = 0;
3204 if (kvm_arch_setup_async_pf(vcpu))
3206 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3208 return vcpu_post_run_fault_in_sie(vcpu);
3211 static int __vcpu_run(struct kvm_vcpu *vcpu)
3213 int rc, exit_reason;
3216 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3217 * ning the guest), so that memslots (and other stuff) are protected
3219 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3222 rc = vcpu_pre_run(vcpu);
3226 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3228 * As PF_VCPU will be used in fault handler, between
3229 * guest_enter and guest_exit should be no uaccess.
3231 local_irq_disable();
3232 guest_enter_irqoff();
3233 __disable_cpu_timer_accounting(vcpu);
3235 exit_reason = sie64a(vcpu->arch.sie_block,
3236 vcpu->run->s.regs.gprs);
3237 local_irq_disable();
3238 __enable_cpu_timer_accounting(vcpu);
3239 guest_exit_irqoff();
3241 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3243 rc = vcpu_post_run(vcpu, exit_reason);
3244 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3246 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3250 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3252 struct runtime_instr_cb *riccb;
3255 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3256 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3257 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3258 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3259 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3260 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3261 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3262 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3263 /* some control register changes require a tlb flush */
3264 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3266 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3267 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3268 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3269 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3270 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3271 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3273 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3274 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3275 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3276 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3277 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3278 kvm_clear_async_pf_completion_queue(vcpu);
3281 * If userspace sets the riccb (e.g. after migration) to a valid state,
3282 * we should enable RI here instead of doing the lazy enablement.
3284 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3285 test_kvm_facility(vcpu->kvm, 64) &&
3287 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3288 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3289 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3292 * If userspace sets the gscb (e.g. after migration) to non-zero,
3293 * we should enable GS here instead of doing the lazy enablement.
3295 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3296 test_kvm_facility(vcpu->kvm, 133) &&
3298 !vcpu->arch.gs_enabled) {
3299 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3300 vcpu->arch.sie_block->ecb |= ECB_GS;
3301 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3302 vcpu->arch.gs_enabled = 1;
3304 save_access_regs(vcpu->arch.host_acrs);
3305 restore_access_regs(vcpu->run->s.regs.acrs);
3306 /* save host (userspace) fprs/vrs */
3308 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3309 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3311 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3313 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3314 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3315 if (test_fp_ctl(current->thread.fpu.fpc))
3316 /* User space provided an invalid FPC, let's clear it */
3317 current->thread.fpu.fpc = 0;
3318 if (MACHINE_HAS_GS) {
3320 __ctl_set_bit(2, 4);
3321 if (current->thread.gs_cb) {
3322 vcpu->arch.host_gscb = current->thread.gs_cb;
3323 save_gs_cb(vcpu->arch.host_gscb);
3325 if (vcpu->arch.gs_enabled) {
3326 current->thread.gs_cb = (struct gs_cb *)
3327 &vcpu->run->s.regs.gscb;
3328 restore_gs_cb(current->thread.gs_cb);
3333 kvm_run->kvm_dirty_regs = 0;
3336 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3338 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3339 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3340 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3341 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3342 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3343 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3344 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3345 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3346 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3347 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3348 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3349 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3350 save_access_regs(vcpu->run->s.regs.acrs);
3351 restore_access_regs(vcpu->arch.host_acrs);
3352 /* Save guest register state */
3354 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3355 /* Restore will be done lazily at return */
3356 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3357 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3358 if (MACHINE_HAS_GS) {
3359 __ctl_set_bit(2, 4);
3360 if (vcpu->arch.gs_enabled)
3361 save_gs_cb(current->thread.gs_cb);
3363 current->thread.gs_cb = vcpu->arch.host_gscb;
3364 restore_gs_cb(vcpu->arch.host_gscb);
3366 if (!vcpu->arch.host_gscb)
3367 __ctl_clear_bit(2, 4);
3368 vcpu->arch.host_gscb = NULL;
3373 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3378 if (kvm_run->immediate_exit)
3381 if (guestdbg_exit_pending(vcpu)) {
3382 kvm_s390_prepare_debug_exit(vcpu);
3386 if (vcpu->sigset_active)
3387 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3389 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3390 kvm_s390_vcpu_start(vcpu);
3391 } else if (is_vcpu_stopped(vcpu)) {
3392 pr_err_ratelimited("can't run stopped vcpu %d\n",
3397 sync_regs(vcpu, kvm_run);
3398 enable_cpu_timer_accounting(vcpu);
3401 rc = __vcpu_run(vcpu);
3403 if (signal_pending(current) && !rc) {
3404 kvm_run->exit_reason = KVM_EXIT_INTR;
3408 if (guestdbg_exit_pending(vcpu) && !rc) {
3409 kvm_s390_prepare_debug_exit(vcpu);
3413 if (rc == -EREMOTE) {
3414 /* userspace support is needed, kvm_run has been prepared */
3418 disable_cpu_timer_accounting(vcpu);
3419 store_regs(vcpu, kvm_run);
3421 if (vcpu->sigset_active)
3422 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3424 vcpu->stat.exit_userspace++;
3429 * store status at address
3430 * we use have two special cases:
3431 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3432 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3434 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3436 unsigned char archmode = 1;
3437 freg_t fprs[NUM_FPRS];
3442 px = kvm_s390_get_prefix(vcpu);
3443 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3444 if (write_guest_abs(vcpu, 163, &archmode, 1))
3447 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3448 if (write_guest_real(vcpu, 163, &archmode, 1))
3452 gpa -= __LC_FPREGS_SAVE_AREA;
3454 /* manually convert vector registers if necessary */
3455 if (MACHINE_HAS_VX) {
3456 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3457 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3460 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3461 vcpu->run->s.regs.fprs, 128);
3463 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3464 vcpu->run->s.regs.gprs, 128);
3465 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3466 &vcpu->arch.sie_block->gpsw, 16);
3467 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3469 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3470 &vcpu->run->s.regs.fpc, 4);
3471 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3472 &vcpu->arch.sie_block->todpr, 4);
3473 cputm = kvm_s390_get_cpu_timer(vcpu);
3474 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3476 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3477 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3479 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3480 &vcpu->run->s.regs.acrs, 64);
3481 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3482 &vcpu->arch.sie_block->gcr, 128);
3483 return rc ? -EFAULT : 0;
3486 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3489 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3490 * switch in the run ioctl. Let's update our copies before we save
3491 * it into the save area
3494 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3495 save_access_regs(vcpu->run->s.regs.acrs);
3497 return kvm_s390_store_status_unloaded(vcpu, addr);
3500 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3502 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3503 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3506 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3509 struct kvm_vcpu *vcpu;
3511 kvm_for_each_vcpu(i, vcpu, kvm) {
3512 __disable_ibs_on_vcpu(vcpu);
3516 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3520 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3521 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3524 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3526 int i, online_vcpus, started_vcpus = 0;
3528 if (!is_vcpu_stopped(vcpu))
3531 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3532 /* Only one cpu at a time may enter/leave the STOPPED state. */
3533 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3534 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3536 for (i = 0; i < online_vcpus; i++) {
3537 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3541 if (started_vcpus == 0) {
3542 /* we're the only active VCPU -> speed it up */
3543 __enable_ibs_on_vcpu(vcpu);
3544 } else if (started_vcpus == 1) {
3546 * As we are starting a second VCPU, we have to disable
3547 * the IBS facility on all VCPUs to remove potentially
3548 * oustanding ENABLE requests.
3550 __disable_ibs_on_all_vcpus(vcpu->kvm);
3553 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3555 * Another VCPU might have used IBS while we were offline.
3556 * Let's play safe and flush the VCPU at startup.
3558 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3559 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3563 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3565 int i, online_vcpus, started_vcpus = 0;
3566 struct kvm_vcpu *started_vcpu = NULL;
3568 if (is_vcpu_stopped(vcpu))
3571 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3572 /* Only one cpu at a time may enter/leave the STOPPED state. */
3573 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3574 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3576 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3577 kvm_s390_clear_stop_irq(vcpu);
3579 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3580 __disable_ibs_on_vcpu(vcpu);
3582 for (i = 0; i < online_vcpus; i++) {
3583 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3585 started_vcpu = vcpu->kvm->vcpus[i];
3589 if (started_vcpus == 1) {
3591 * As we only have one VCPU left, we want to enable the
3592 * IBS facility for that VCPU to speed it up.
3594 __enable_ibs_on_vcpu(started_vcpu);
3597 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3601 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3602 struct kvm_enable_cap *cap)
3610 case KVM_CAP_S390_CSS_SUPPORT:
3611 if (!vcpu->kvm->arch.css_support) {
3612 vcpu->kvm->arch.css_support = 1;
3613 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3614 trace_kvm_s390_enable_css(vcpu->kvm);
3625 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3626 struct kvm_s390_mem_op *mop)
3628 void __user *uaddr = (void __user *)mop->buf;
3629 void *tmpbuf = NULL;
3631 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3632 | KVM_S390_MEMOP_F_CHECK_ONLY;
3634 if (mop->flags & ~supported_flags)
3637 if (mop->size > MEM_OP_MAX_SIZE)
3640 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3641 tmpbuf = vmalloc(mop->size);
3646 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3649 case KVM_S390_MEMOP_LOGICAL_READ:
3650 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3651 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3652 mop->size, GACC_FETCH);
3655 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3657 if (copy_to_user(uaddr, tmpbuf, mop->size))
3661 case KVM_S390_MEMOP_LOGICAL_WRITE:
3662 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3663 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3664 mop->size, GACC_STORE);
3667 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3671 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3677 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3679 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3680 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3686 long kvm_arch_vcpu_ioctl(struct file *filp,
3687 unsigned int ioctl, unsigned long arg)
3689 struct kvm_vcpu *vcpu = filp->private_data;
3690 void __user *argp = (void __user *)arg;
3695 case KVM_S390_IRQ: {
3696 struct kvm_s390_irq s390irq;
3699 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3701 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3704 case KVM_S390_INTERRUPT: {
3705 struct kvm_s390_interrupt s390int;
3706 struct kvm_s390_irq s390irq;
3709 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3711 if (s390int_to_s390irq(&s390int, &s390irq))
3713 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3716 case KVM_S390_STORE_STATUS:
3717 idx = srcu_read_lock(&vcpu->kvm->srcu);
3718 r = kvm_s390_vcpu_store_status(vcpu, arg);
3719 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3721 case KVM_S390_SET_INITIAL_PSW: {
3725 if (copy_from_user(&psw, argp, sizeof(psw)))
3727 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3730 case KVM_S390_INITIAL_RESET:
3731 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3733 case KVM_SET_ONE_REG:
3734 case KVM_GET_ONE_REG: {
3735 struct kvm_one_reg reg;
3737 if (copy_from_user(®, argp, sizeof(reg)))
3739 if (ioctl == KVM_SET_ONE_REG)
3740 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3742 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3745 #ifdef CONFIG_KVM_S390_UCONTROL
3746 case KVM_S390_UCAS_MAP: {
3747 struct kvm_s390_ucas_mapping ucasmap;
3749 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3754 if (!kvm_is_ucontrol(vcpu->kvm)) {
3759 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3760 ucasmap.vcpu_addr, ucasmap.length);
3763 case KVM_S390_UCAS_UNMAP: {
3764 struct kvm_s390_ucas_mapping ucasmap;
3766 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3771 if (!kvm_is_ucontrol(vcpu->kvm)) {
3776 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3781 case KVM_S390_VCPU_FAULT: {
3782 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3785 case KVM_ENABLE_CAP:
3787 struct kvm_enable_cap cap;
3789 if (copy_from_user(&cap, argp, sizeof(cap)))
3791 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3794 case KVM_S390_MEM_OP: {
3795 struct kvm_s390_mem_op mem_op;
3797 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3798 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3803 case KVM_S390_SET_IRQ_STATE: {
3804 struct kvm_s390_irq_state irq_state;
3807 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3809 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3810 irq_state.len == 0 ||
3811 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3815 r = kvm_s390_set_irq_state(vcpu,
3816 (void __user *) irq_state.buf,
3820 case KVM_S390_GET_IRQ_STATE: {
3821 struct kvm_s390_irq_state irq_state;
3824 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3826 if (irq_state.len == 0) {
3830 r = kvm_s390_get_irq_state(vcpu,
3831 (__u8 __user *) irq_state.buf,
3841 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3843 #ifdef CONFIG_KVM_S390_UCONTROL
3844 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3845 && (kvm_is_ucontrol(vcpu->kvm))) {
3846 vmf->page = virt_to_page(vcpu->arch.sie_block);
3847 get_page(vmf->page);
3851 return VM_FAULT_SIGBUS;
3854 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3855 unsigned long npages)
3860 /* Section: memory related */
3861 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3862 struct kvm_memory_slot *memslot,
3863 const struct kvm_userspace_memory_region *mem,
3864 enum kvm_mr_change change)
3866 /* A few sanity checks. We can have memory slots which have to be
3867 located/ended at a segment boundary (1MB). The memory in userland is
3868 ok to be fragmented into various different vmas. It is okay to mmap()
3869 and munmap() stuff in this slot after doing this call at any time */
3871 if (mem->userspace_addr & 0xffffful)
3874 if (mem->memory_size & 0xffffful)
3877 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3883 void kvm_arch_commit_memory_region(struct kvm *kvm,
3884 const struct kvm_userspace_memory_region *mem,
3885 const struct kvm_memory_slot *old,
3886 const struct kvm_memory_slot *new,
3887 enum kvm_mr_change change)
3891 /* If the basics of the memslot do not change, we do not want
3892 * to update the gmap. Every update causes several unnecessary
3893 * segment translation exceptions. This is usually handled just
3894 * fine by the normal fault handler + gmap, but it will also
3895 * cause faults on the prefix page of running guest CPUs.
3897 if (old->userspace_addr == mem->userspace_addr &&
3898 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3899 old->npages * PAGE_SIZE == mem->memory_size)
3902 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3903 mem->guest_phys_addr, mem->memory_size);
3905 pr_warn("failed to commit memory region\n");
3909 static inline unsigned long nonhyp_mask(int i)
3911 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3913 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3916 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3918 vcpu->valid_wakeup = false;
3921 static int __init kvm_s390_init(void)
3925 if (!sclp.has_sief2) {
3926 pr_info("SIE not available\n");
3930 for (i = 0; i < 16; i++)
3931 kvm_s390_fac_list_mask[i] |=
3932 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3934 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3937 static void __exit kvm_s390_exit(void)
3942 module_init(kvm_s390_init);
3943 module_exit(kvm_s390_exit);
3946 * Enable autoloading of the kvm module.
3947 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3948 * since x86 takes a different approach.
3950 #include <linux/miscdevice.h>
3951 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3952 MODULE_ALIAS("devname:kvm");