1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2018
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
38 #include <asm/pgtable.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
50 #define CREATE_TRACE_POINTS
52 #include "trace-s390.h"
54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 (KVM_MAX_VCPUS + LOCAL_IRQS))
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 { "userspace_handled", VCPU_STAT(exit_userspace) },
64 { "exit_null", VCPU_STAT(exit_null) },
65 { "exit_validity", VCPU_STAT(exit_validity) },
66 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 { "exit_external_request", VCPU_STAT(exit_external_request) },
68 { "exit_io_request", VCPU_STAT(exit_io_request) },
69 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 { "exit_instruction", VCPU_STAT(exit_instruction) },
71 { "exit_pei", VCPU_STAT(exit_pei) },
72 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
79 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
81 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
82 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
83 { "deliver_ckc", VCPU_STAT(deliver_ckc) },
84 { "deliver_cputm", VCPU_STAT(deliver_cputm) },
85 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
87 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88 { "deliver_virtio", VCPU_STAT(deliver_virtio) },
89 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92 { "deliver_program", VCPU_STAT(deliver_program) },
93 { "deliver_io", VCPU_STAT(deliver_io) },
94 { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
96 { "inject_ckc", VCPU_STAT(inject_ckc) },
97 { "inject_cputm", VCPU_STAT(inject_cputm) },
98 { "inject_external_call", VCPU_STAT(inject_external_call) },
99 { "inject_float_mchk", VM_STAT(inject_float_mchk) },
100 { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101 { "inject_io", VM_STAT(inject_io) },
102 { "inject_mchk", VCPU_STAT(inject_mchk) },
103 { "inject_pfault_done", VM_STAT(inject_pfault_done) },
104 { "inject_program", VCPU_STAT(inject_program) },
105 { "inject_restart", VCPU_STAT(inject_restart) },
106 { "inject_service_signal", VM_STAT(inject_service_signal) },
107 { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108 { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109 { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110 { "inject_virtio", VM_STAT(inject_virtio) },
111 { "instruction_epsw", VCPU_STAT(instruction_epsw) },
112 { "instruction_gs", VCPU_STAT(instruction_gs) },
113 { "instruction_io_other", VCPU_STAT(instruction_io_other) },
114 { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115 { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117 { "instruction_ptff", VCPU_STAT(instruction_ptff) },
118 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
119 { "instruction_sck", VCPU_STAT(instruction_sck) },
120 { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121 { "instruction_spx", VCPU_STAT(instruction_spx) },
122 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
123 { "instruction_stap", VCPU_STAT(instruction_stap) },
124 { "instruction_iske", VCPU_STAT(instruction_iske) },
125 { "instruction_ri", VCPU_STAT(instruction_ri) },
126 { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127 { "instruction_sske", VCPU_STAT(instruction_sske) },
128 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129 { "instruction_essa", VCPU_STAT(instruction_essa) },
130 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
131 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
132 { "instruction_tb", VCPU_STAT(instruction_tb) },
133 { "instruction_tpi", VCPU_STAT(instruction_tpi) },
134 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
135 { "instruction_tsch", VCPU_STAT(instruction_tsch) },
136 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137 { "instruction_sie", VCPU_STAT(instruction_sie) },
138 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154 { "instruction_diag_10", VCPU_STAT(diagnose_10) },
155 { "instruction_diag_44", VCPU_STAT(diagnose_44) },
156 { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157 { "instruction_diag_258", VCPU_STAT(diagnose_258) },
158 { "instruction_diag_308", VCPU_STAT(diagnose_308) },
159 { "instruction_diag_500", VCPU_STAT(diagnose_500) },
160 { "instruction_diag_other", VCPU_STAT(diagnose_other) },
164 struct kvm_s390_tod_clock_ext {
170 /* allow nested virtualization in KVM (if enabled by user space) */
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
175 /* allow 1m huge page guest backing, if !nested */
177 module_param(hpage, int, 0444);
178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181 * For now we handle at most 16 double words as this is what the s390 base
182 * kernel handles and stores in the prefix page. If we ever need to go beyond
183 * this, this requires changes to code, but the external uapi can stay.
185 #define SIZE_INTERNAL 16
188 * Base feature mask that defines default mask for facilities. Consists of the
189 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
193 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
194 * and defines the facilities that can be enabled via a cpu model.
196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
198 static unsigned long kvm_s390_fac_size(void)
200 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
201 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
202 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
203 sizeof(S390_lowcore.stfle_fac_list));
205 return SIZE_INTERNAL;
208 /* available cpu features supported by kvm */
209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
210 /* available subfunctions indicated via query / "test bit" */
211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
213 static struct gmap_notifier gmap_notifier;
214 static struct gmap_notifier vsie_gmap_notifier;
215 debug_info_t *kvm_s390_dbf;
217 /* Section: not file related */
218 int kvm_arch_hardware_enable(void)
220 /* every s390 is virtualization enabled ;-) */
224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
232 * The TOD jumps by delta, we have to compensate this by adding
233 * -delta to the epoch.
237 /* sign-extension - we're adding to signed values below */
242 if (scb->ecd & ECD_MEF) {
243 scb->epdx += delta_idx;
244 if (scb->epoch < delta)
250 * This callback is executed during stop_machine(). All CPUs are therefore
251 * temporarily stopped. In order not to change guest behavior, we have to
252 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
253 * so a CPU won't be stopped while calculating with the epoch.
255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
259 struct kvm_vcpu *vcpu;
261 unsigned long long *delta = v;
263 list_for_each_entry(kvm, &vm_list, vm_list) {
264 kvm_for_each_vcpu(i, vcpu, kvm) {
265 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
267 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
268 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
270 if (vcpu->arch.cputm_enabled)
271 vcpu->arch.cputm_start += *delta;
272 if (vcpu->arch.vsie_block)
273 kvm_clock_sync_scb(vcpu->arch.vsie_block,
280 static struct notifier_block kvm_clock_notifier = {
281 .notifier_call = kvm_clock_sync,
284 int kvm_arch_hardware_setup(void)
286 gmap_notifier.notifier_call = kvm_gmap_notifier;
287 gmap_register_pte_notifier(&gmap_notifier);
288 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
289 gmap_register_pte_notifier(&vsie_gmap_notifier);
290 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
291 &kvm_clock_notifier);
295 void kvm_arch_hardware_unsetup(void)
297 gmap_unregister_pte_notifier(&gmap_notifier);
298 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
299 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
300 &kvm_clock_notifier);
303 static void allow_cpu_feat(unsigned long nr)
305 set_bit_inv(nr, kvm_s390_available_cpu_feat);
308 static inline int plo_test_bit(unsigned char nr)
310 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
314 /* Parameter registers are ignored for "test bit" */
324 static void kvm_s390_cpu_feat_init(void)
328 for (i = 0; i < 256; ++i) {
330 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
333 if (test_facility(28)) /* TOD-clock steering */
334 ptff(kvm_s390_available_subfunc.ptff,
335 sizeof(kvm_s390_available_subfunc.ptff),
338 if (test_facility(17)) { /* MSA */
339 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
340 kvm_s390_available_subfunc.kmac);
341 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
342 kvm_s390_available_subfunc.kmc);
343 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
344 kvm_s390_available_subfunc.km);
345 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
346 kvm_s390_available_subfunc.kimd);
347 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
348 kvm_s390_available_subfunc.klmd);
350 if (test_facility(76)) /* MSA3 */
351 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
352 kvm_s390_available_subfunc.pckmo);
353 if (test_facility(77)) { /* MSA4 */
354 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
355 kvm_s390_available_subfunc.kmctr);
356 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
357 kvm_s390_available_subfunc.kmf);
358 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
359 kvm_s390_available_subfunc.kmo);
360 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
361 kvm_s390_available_subfunc.pcc);
363 if (test_facility(57)) /* MSA5 */
364 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
365 kvm_s390_available_subfunc.ppno);
367 if (test_facility(146)) /* MSA8 */
368 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
369 kvm_s390_available_subfunc.kma);
371 if (MACHINE_HAS_ESOP)
372 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
374 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
375 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
377 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
378 !test_facility(3) || !nested)
380 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
381 if (sclp.has_64bscao)
382 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
384 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
386 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
388 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
390 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
392 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
394 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
396 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
398 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
399 * all skey handling functions read/set the skey from the PGSTE
400 * instead of the real storage key.
402 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
403 * pages being detected as preserved although they are resident.
405 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
406 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
408 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
409 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
410 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
412 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
413 * cannot easily shadow the SCA because of the ipte lock.
417 int kvm_arch_init(void *opaque)
421 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
425 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
427 goto out_debug_unreg;
430 kvm_s390_cpu_feat_init();
432 /* Register floating interrupt controller interface. */
433 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
435 pr_err("Failed to register FLIC rc=%d\n", rc);
436 goto out_debug_unreg;
441 debug_unregister(kvm_s390_dbf);
445 void kvm_arch_exit(void)
447 debug_unregister(kvm_s390_dbf);
450 /* Section: device related */
451 long kvm_arch_dev_ioctl(struct file *filp,
452 unsigned int ioctl, unsigned long arg)
454 if (ioctl == KVM_S390_ENABLE_SIE)
455 return s390_enable_sie();
459 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
464 case KVM_CAP_S390_PSW:
465 case KVM_CAP_S390_GMAP:
466 case KVM_CAP_SYNC_MMU:
467 #ifdef CONFIG_KVM_S390_UCONTROL
468 case KVM_CAP_S390_UCONTROL:
470 case KVM_CAP_ASYNC_PF:
471 case KVM_CAP_SYNC_REGS:
472 case KVM_CAP_ONE_REG:
473 case KVM_CAP_ENABLE_CAP:
474 case KVM_CAP_S390_CSS_SUPPORT:
475 case KVM_CAP_IOEVENTFD:
476 case KVM_CAP_DEVICE_CTRL:
477 case KVM_CAP_S390_IRQCHIP:
478 case KVM_CAP_VM_ATTRIBUTES:
479 case KVM_CAP_MP_STATE:
480 case KVM_CAP_IMMEDIATE_EXIT:
481 case KVM_CAP_S390_INJECT_IRQ:
482 case KVM_CAP_S390_USER_SIGP:
483 case KVM_CAP_S390_USER_STSI:
484 case KVM_CAP_S390_SKEYS:
485 case KVM_CAP_S390_IRQ_STATE:
486 case KVM_CAP_S390_USER_INSTR0:
487 case KVM_CAP_S390_CMMA_MIGRATION:
488 case KVM_CAP_S390_AIS:
489 case KVM_CAP_S390_AIS_MIGRATION:
492 case KVM_CAP_S390_HPAGE_1M:
494 if (hpage && !kvm_is_ucontrol(kvm))
497 case KVM_CAP_S390_MEM_OP:
500 case KVM_CAP_NR_VCPUS:
501 case KVM_CAP_MAX_VCPUS:
502 r = KVM_S390_BSCA_CPU_SLOTS;
503 if (!kvm_s390_use_sca_entries())
505 else if (sclp.has_esca && sclp.has_64bscao)
506 r = KVM_S390_ESCA_CPU_SLOTS;
508 case KVM_CAP_NR_MEMSLOTS:
509 r = KVM_USER_MEM_SLOTS;
511 case KVM_CAP_S390_COW:
512 r = MACHINE_HAS_ESOP;
514 case KVM_CAP_S390_VECTOR_REGISTERS:
517 case KVM_CAP_S390_RI:
518 r = test_facility(64);
520 case KVM_CAP_S390_GS:
521 r = test_facility(133);
523 case KVM_CAP_S390_BPB:
524 r = test_facility(82);
532 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
533 struct kvm_memory_slot *memslot)
536 gfn_t cur_gfn, last_gfn;
537 unsigned long gaddr, vmaddr;
538 struct gmap *gmap = kvm->arch.gmap;
539 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
541 /* Loop over all guest segments */
542 cur_gfn = memslot->base_gfn;
543 last_gfn = memslot->base_gfn + memslot->npages;
544 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
545 gaddr = gfn_to_gpa(cur_gfn);
546 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
547 if (kvm_is_error_hva(vmaddr))
550 bitmap_zero(bitmap, _PAGE_ENTRIES);
551 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
552 for (i = 0; i < _PAGE_ENTRIES; i++) {
553 if (test_bit(i, bitmap))
554 mark_page_dirty(kvm, cur_gfn + i);
557 if (fatal_signal_pending(current))
563 /* Section: vm related */
564 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
567 * Get (and clear) the dirty memory log for a memory slot.
569 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
570 struct kvm_dirty_log *log)
574 struct kvm_memslots *slots;
575 struct kvm_memory_slot *memslot;
578 if (kvm_is_ucontrol(kvm))
581 mutex_lock(&kvm->slots_lock);
584 if (log->slot >= KVM_USER_MEM_SLOTS)
587 slots = kvm_memslots(kvm);
588 memslot = id_to_memslot(slots, log->slot);
590 if (!memslot->dirty_bitmap)
593 kvm_s390_sync_dirty_log(kvm, memslot);
594 r = kvm_get_dirty_log(kvm, log, &is_dirty);
598 /* Clear the dirty log */
600 n = kvm_dirty_bitmap_bytes(memslot);
601 memset(memslot->dirty_bitmap, 0, n);
605 mutex_unlock(&kvm->slots_lock);
609 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
612 struct kvm_vcpu *vcpu;
614 kvm_for_each_vcpu(i, vcpu, kvm) {
615 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
619 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
627 case KVM_CAP_S390_IRQCHIP:
628 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
629 kvm->arch.use_irqchip = 1;
632 case KVM_CAP_S390_USER_SIGP:
633 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
634 kvm->arch.user_sigp = 1;
637 case KVM_CAP_S390_VECTOR_REGISTERS:
638 mutex_lock(&kvm->lock);
639 if (kvm->created_vcpus) {
641 } else if (MACHINE_HAS_VX) {
642 set_kvm_facility(kvm->arch.model.fac_mask, 129);
643 set_kvm_facility(kvm->arch.model.fac_list, 129);
644 if (test_facility(134)) {
645 set_kvm_facility(kvm->arch.model.fac_mask, 134);
646 set_kvm_facility(kvm->arch.model.fac_list, 134);
648 if (test_facility(135)) {
649 set_kvm_facility(kvm->arch.model.fac_mask, 135);
650 set_kvm_facility(kvm->arch.model.fac_list, 135);
655 mutex_unlock(&kvm->lock);
656 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
657 r ? "(not available)" : "(success)");
659 case KVM_CAP_S390_RI:
661 mutex_lock(&kvm->lock);
662 if (kvm->created_vcpus) {
664 } else if (test_facility(64)) {
665 set_kvm_facility(kvm->arch.model.fac_mask, 64);
666 set_kvm_facility(kvm->arch.model.fac_list, 64);
669 mutex_unlock(&kvm->lock);
670 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
671 r ? "(not available)" : "(success)");
673 case KVM_CAP_S390_AIS:
674 mutex_lock(&kvm->lock);
675 if (kvm->created_vcpus) {
678 set_kvm_facility(kvm->arch.model.fac_mask, 72);
679 set_kvm_facility(kvm->arch.model.fac_list, 72);
682 mutex_unlock(&kvm->lock);
683 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
684 r ? "(not available)" : "(success)");
686 case KVM_CAP_S390_GS:
688 mutex_lock(&kvm->lock);
689 if (kvm->created_vcpus) {
691 } else if (test_facility(133)) {
692 set_kvm_facility(kvm->arch.model.fac_mask, 133);
693 set_kvm_facility(kvm->arch.model.fac_list, 133);
696 mutex_unlock(&kvm->lock);
697 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
698 r ? "(not available)" : "(success)");
700 case KVM_CAP_S390_HPAGE_1M:
701 mutex_lock(&kvm->lock);
702 if (kvm->created_vcpus)
704 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
708 down_write(&kvm->mm->mmap_sem);
709 kvm->mm->context.allow_gmap_hpage_1m = 1;
710 up_write(&kvm->mm->mmap_sem);
712 * We might have to create fake 4k page
713 * tables. To avoid that the hardware works on
714 * stale PGSTEs, we emulate these instructions.
716 kvm->arch.use_skf = 0;
717 kvm->arch.use_pfmfi = 0;
719 mutex_unlock(&kvm->lock);
720 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
721 r ? "(not available)" : "(success)");
723 case KVM_CAP_S390_USER_STSI:
724 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
725 kvm->arch.user_stsi = 1;
728 case KVM_CAP_S390_USER_INSTR0:
729 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
730 kvm->arch.user_instr0 = 1;
731 icpt_operexc_on_all_vcpus(kvm);
741 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
745 switch (attr->attr) {
746 case KVM_S390_VM_MEM_LIMIT_SIZE:
748 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
749 kvm->arch.mem_limit);
750 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
760 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
764 switch (attr->attr) {
765 case KVM_S390_VM_MEM_ENABLE_CMMA:
770 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
771 mutex_lock(&kvm->lock);
772 if (kvm->created_vcpus)
774 else if (kvm->mm->context.allow_gmap_hpage_1m)
777 kvm->arch.use_cmma = 1;
778 /* Not compatible with cmma. */
779 kvm->arch.use_pfmfi = 0;
782 mutex_unlock(&kvm->lock);
784 case KVM_S390_VM_MEM_CLR_CMMA:
789 if (!kvm->arch.use_cmma)
792 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
793 mutex_lock(&kvm->lock);
794 idx = srcu_read_lock(&kvm->srcu);
795 s390_reset_cmma(kvm->arch.gmap->mm);
796 srcu_read_unlock(&kvm->srcu, idx);
797 mutex_unlock(&kvm->lock);
800 case KVM_S390_VM_MEM_LIMIT_SIZE: {
801 unsigned long new_limit;
803 if (kvm_is_ucontrol(kvm))
806 if (get_user(new_limit, (u64 __user *)attr->addr))
809 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
810 new_limit > kvm->arch.mem_limit)
816 /* gmap_create takes last usable address */
817 if (new_limit != KVM_S390_NO_MEM_LIMIT)
821 mutex_lock(&kvm->lock);
822 if (!kvm->created_vcpus) {
823 /* gmap_create will round the limit up */
824 struct gmap *new = gmap_create(current->mm, new_limit);
829 gmap_remove(kvm->arch.gmap);
831 kvm->arch.gmap = new;
835 mutex_unlock(&kvm->lock);
836 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
837 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
838 (void *) kvm->arch.gmap->asce);
848 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
850 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
852 struct kvm_vcpu *vcpu;
855 kvm_s390_vcpu_block_all(kvm);
857 kvm_for_each_vcpu(i, vcpu, kvm) {
858 kvm_s390_vcpu_crypto_setup(vcpu);
859 /* recreate the shadow crycb by leaving the VSIE handler */
860 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
863 kvm_s390_vcpu_unblock_all(kvm);
866 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
868 mutex_lock(&kvm->lock);
869 switch (attr->attr) {
870 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
871 if (!test_kvm_facility(kvm, 76)) {
872 mutex_unlock(&kvm->lock);
876 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
877 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
878 kvm->arch.crypto.aes_kw = 1;
879 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
881 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
882 if (!test_kvm_facility(kvm, 76)) {
883 mutex_unlock(&kvm->lock);
887 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
888 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
889 kvm->arch.crypto.dea_kw = 1;
890 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
892 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
893 if (!test_kvm_facility(kvm, 76)) {
894 mutex_unlock(&kvm->lock);
897 kvm->arch.crypto.aes_kw = 0;
898 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
899 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
900 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
902 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
903 if (!test_kvm_facility(kvm, 76)) {
904 mutex_unlock(&kvm->lock);
907 kvm->arch.crypto.dea_kw = 0;
908 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
909 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
910 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
912 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
913 if (!ap_instructions_available()) {
914 mutex_unlock(&kvm->lock);
917 kvm->arch.crypto.apie = 1;
919 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
920 if (!ap_instructions_available()) {
921 mutex_unlock(&kvm->lock);
924 kvm->arch.crypto.apie = 0;
927 mutex_unlock(&kvm->lock);
931 kvm_s390_vcpu_crypto_reset_all(kvm);
932 mutex_unlock(&kvm->lock);
936 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
939 struct kvm_vcpu *vcpu;
941 kvm_for_each_vcpu(cx, vcpu, kvm)
942 kvm_s390_sync_request(req, vcpu);
946 * Must be called with kvm->srcu held to avoid races on memslots, and with
947 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
949 static int kvm_s390_vm_start_migration(struct kvm *kvm)
951 struct kvm_memory_slot *ms;
952 struct kvm_memslots *slots;
953 unsigned long ram_pages = 0;
956 /* migration mode already enabled */
957 if (kvm->arch.migration_mode)
959 slots = kvm_memslots(kvm);
960 if (!slots || !slots->used_slots)
963 if (!kvm->arch.use_cmma) {
964 kvm->arch.migration_mode = 1;
967 /* mark all the pages in active slots as dirty */
968 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
969 ms = slots->memslots + slotnr;
971 * The second half of the bitmap is only used on x86,
972 * and would be wasted otherwise, so we put it to good
973 * use here to keep track of the state of the storage
976 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
977 ram_pages += ms->npages;
979 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
980 kvm->arch.migration_mode = 1;
981 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
986 * Must be called with kvm->slots_lock to avoid races with ourselves and
987 * kvm_s390_vm_start_migration.
989 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
991 /* migration mode already disabled */
992 if (!kvm->arch.migration_mode)
994 kvm->arch.migration_mode = 0;
995 if (kvm->arch.use_cmma)
996 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1000 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1001 struct kvm_device_attr *attr)
1005 mutex_lock(&kvm->slots_lock);
1006 switch (attr->attr) {
1007 case KVM_S390_VM_MIGRATION_START:
1008 res = kvm_s390_vm_start_migration(kvm);
1010 case KVM_S390_VM_MIGRATION_STOP:
1011 res = kvm_s390_vm_stop_migration(kvm);
1016 mutex_unlock(&kvm->slots_lock);
1021 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1022 struct kvm_device_attr *attr)
1024 u64 mig = kvm->arch.migration_mode;
1026 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1029 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1034 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1036 struct kvm_s390_vm_tod_clock gtod;
1038 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1041 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1043 kvm_s390_set_tod_clock(kvm, >od);
1045 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1046 gtod.epoch_idx, gtod.tod);
1051 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1055 if (copy_from_user(>od_high, (void __user *)attr->addr,
1061 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1066 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1068 struct kvm_s390_vm_tod_clock gtod = { 0 };
1070 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1074 kvm_s390_set_tod_clock(kvm, >od);
1075 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1079 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1086 switch (attr->attr) {
1087 case KVM_S390_VM_TOD_EXT:
1088 ret = kvm_s390_set_tod_ext(kvm, attr);
1090 case KVM_S390_VM_TOD_HIGH:
1091 ret = kvm_s390_set_tod_high(kvm, attr);
1093 case KVM_S390_VM_TOD_LOW:
1094 ret = kvm_s390_set_tod_low(kvm, attr);
1103 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1104 struct kvm_s390_vm_tod_clock *gtod)
1106 struct kvm_s390_tod_clock_ext htod;
1110 get_tod_clock_ext((char *)&htod);
1112 gtod->tod = htod.tod + kvm->arch.epoch;
1113 gtod->epoch_idx = 0;
1114 if (test_kvm_facility(kvm, 139)) {
1115 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1116 if (gtod->tod < htod.tod)
1117 gtod->epoch_idx += 1;
1123 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1125 struct kvm_s390_vm_tod_clock gtod;
1127 memset(>od, 0, sizeof(gtod));
1128 kvm_s390_get_tod_clock(kvm, >od);
1129 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1132 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1133 gtod.epoch_idx, gtod.tod);
1137 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1141 if (copy_to_user((void __user *)attr->addr, >od_high,
1144 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1149 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1153 gtod = kvm_s390_get_tod_clock_fast(kvm);
1154 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1156 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1161 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1168 switch (attr->attr) {
1169 case KVM_S390_VM_TOD_EXT:
1170 ret = kvm_s390_get_tod_ext(kvm, attr);
1172 case KVM_S390_VM_TOD_HIGH:
1173 ret = kvm_s390_get_tod_high(kvm, attr);
1175 case KVM_S390_VM_TOD_LOW:
1176 ret = kvm_s390_get_tod_low(kvm, attr);
1185 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1187 struct kvm_s390_vm_cpu_processor *proc;
1188 u16 lowest_ibc, unblocked_ibc;
1191 mutex_lock(&kvm->lock);
1192 if (kvm->created_vcpus) {
1196 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1201 if (!copy_from_user(proc, (void __user *)attr->addr,
1203 kvm->arch.model.cpuid = proc->cpuid;
1204 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1205 unblocked_ibc = sclp.ibc & 0xfff;
1206 if (lowest_ibc && proc->ibc) {
1207 if (proc->ibc > unblocked_ibc)
1208 kvm->arch.model.ibc = unblocked_ibc;
1209 else if (proc->ibc < lowest_ibc)
1210 kvm->arch.model.ibc = lowest_ibc;
1212 kvm->arch.model.ibc = proc->ibc;
1214 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1215 S390_ARCH_FAC_LIST_SIZE_BYTE);
1216 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1217 kvm->arch.model.ibc,
1218 kvm->arch.model.cpuid);
1219 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1220 kvm->arch.model.fac_list[0],
1221 kvm->arch.model.fac_list[1],
1222 kvm->arch.model.fac_list[2]);
1227 mutex_unlock(&kvm->lock);
1231 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1232 struct kvm_device_attr *attr)
1234 struct kvm_s390_vm_cpu_feat data;
1236 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1238 if (!bitmap_subset((unsigned long *) data.feat,
1239 kvm_s390_available_cpu_feat,
1240 KVM_S390_VM_CPU_FEAT_NR_BITS))
1243 mutex_lock(&kvm->lock);
1244 if (kvm->created_vcpus) {
1245 mutex_unlock(&kvm->lock);
1248 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1249 KVM_S390_VM_CPU_FEAT_NR_BITS);
1250 mutex_unlock(&kvm->lock);
1251 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1258 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1259 struct kvm_device_attr *attr)
1262 * Once supported by kernel + hw, we have to store the subfunctions
1263 * in kvm->arch and remember that user space configured them.
1268 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1272 switch (attr->attr) {
1273 case KVM_S390_VM_CPU_PROCESSOR:
1274 ret = kvm_s390_set_processor(kvm, attr);
1276 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1277 ret = kvm_s390_set_processor_feat(kvm, attr);
1279 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1280 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1286 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1288 struct kvm_s390_vm_cpu_processor *proc;
1291 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1296 proc->cpuid = kvm->arch.model.cpuid;
1297 proc->ibc = kvm->arch.model.ibc;
1298 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1299 S390_ARCH_FAC_LIST_SIZE_BYTE);
1300 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1301 kvm->arch.model.ibc,
1302 kvm->arch.model.cpuid);
1303 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1304 kvm->arch.model.fac_list[0],
1305 kvm->arch.model.fac_list[1],
1306 kvm->arch.model.fac_list[2]);
1307 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1314 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1316 struct kvm_s390_vm_cpu_machine *mach;
1319 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1324 get_cpu_id((struct cpuid *) &mach->cpuid);
1325 mach->ibc = sclp.ibc;
1326 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1327 S390_ARCH_FAC_LIST_SIZE_BYTE);
1328 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1329 sizeof(S390_lowcore.stfle_fac_list));
1330 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1331 kvm->arch.model.ibc,
1332 kvm->arch.model.cpuid);
1333 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1337 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1341 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1348 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1349 struct kvm_device_attr *attr)
1351 struct kvm_s390_vm_cpu_feat data;
1353 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1354 KVM_S390_VM_CPU_FEAT_NR_BITS);
1355 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1357 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1364 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1365 struct kvm_device_attr *attr)
1367 struct kvm_s390_vm_cpu_feat data;
1369 bitmap_copy((unsigned long *) data.feat,
1370 kvm_s390_available_cpu_feat,
1371 KVM_S390_VM_CPU_FEAT_NR_BITS);
1372 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1374 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1381 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1382 struct kvm_device_attr *attr)
1385 * Once we can actually configure subfunctions (kernel + hw support),
1386 * we have to check if they were already set by user space, if so copy
1387 * them from kvm->arch.
1392 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1393 struct kvm_device_attr *attr)
1395 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1396 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1400 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1404 switch (attr->attr) {
1405 case KVM_S390_VM_CPU_PROCESSOR:
1406 ret = kvm_s390_get_processor(kvm, attr);
1408 case KVM_S390_VM_CPU_MACHINE:
1409 ret = kvm_s390_get_machine(kvm, attr);
1411 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1412 ret = kvm_s390_get_processor_feat(kvm, attr);
1414 case KVM_S390_VM_CPU_MACHINE_FEAT:
1415 ret = kvm_s390_get_machine_feat(kvm, attr);
1417 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1418 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1420 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1421 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1427 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1431 switch (attr->group) {
1432 case KVM_S390_VM_MEM_CTRL:
1433 ret = kvm_s390_set_mem_control(kvm, attr);
1435 case KVM_S390_VM_TOD:
1436 ret = kvm_s390_set_tod(kvm, attr);
1438 case KVM_S390_VM_CPU_MODEL:
1439 ret = kvm_s390_set_cpu_model(kvm, attr);
1441 case KVM_S390_VM_CRYPTO:
1442 ret = kvm_s390_vm_set_crypto(kvm, attr);
1444 case KVM_S390_VM_MIGRATION:
1445 ret = kvm_s390_vm_set_migration(kvm, attr);
1455 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1459 switch (attr->group) {
1460 case KVM_S390_VM_MEM_CTRL:
1461 ret = kvm_s390_get_mem_control(kvm, attr);
1463 case KVM_S390_VM_TOD:
1464 ret = kvm_s390_get_tod(kvm, attr);
1466 case KVM_S390_VM_CPU_MODEL:
1467 ret = kvm_s390_get_cpu_model(kvm, attr);
1469 case KVM_S390_VM_MIGRATION:
1470 ret = kvm_s390_vm_get_migration(kvm, attr);
1480 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1484 switch (attr->group) {
1485 case KVM_S390_VM_MEM_CTRL:
1486 switch (attr->attr) {
1487 case KVM_S390_VM_MEM_ENABLE_CMMA:
1488 case KVM_S390_VM_MEM_CLR_CMMA:
1489 ret = sclp.has_cmma ? 0 : -ENXIO;
1491 case KVM_S390_VM_MEM_LIMIT_SIZE:
1499 case KVM_S390_VM_TOD:
1500 switch (attr->attr) {
1501 case KVM_S390_VM_TOD_LOW:
1502 case KVM_S390_VM_TOD_HIGH:
1510 case KVM_S390_VM_CPU_MODEL:
1511 switch (attr->attr) {
1512 case KVM_S390_VM_CPU_PROCESSOR:
1513 case KVM_S390_VM_CPU_MACHINE:
1514 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1515 case KVM_S390_VM_CPU_MACHINE_FEAT:
1516 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1519 /* configuring subfunctions is not supported yet */
1520 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1526 case KVM_S390_VM_CRYPTO:
1527 switch (attr->attr) {
1528 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1529 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1530 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1531 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1534 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1535 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1536 ret = ap_instructions_available() ? 0 : -ENXIO;
1543 case KVM_S390_VM_MIGRATION:
1554 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1558 int srcu_idx, i, r = 0;
1560 if (args->flags != 0)
1563 /* Is this guest using storage keys? */
1564 if (!mm_uses_skeys(current->mm))
1565 return KVM_S390_GET_SKEYS_NONE;
1567 /* Enforce sane limit on memory allocation */
1568 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1571 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1575 down_read(¤t->mm->mmap_sem);
1576 srcu_idx = srcu_read_lock(&kvm->srcu);
1577 for (i = 0; i < args->count; i++) {
1578 hva = gfn_to_hva(kvm, args->start_gfn + i);
1579 if (kvm_is_error_hva(hva)) {
1584 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1588 srcu_read_unlock(&kvm->srcu, srcu_idx);
1589 up_read(¤t->mm->mmap_sem);
1592 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1593 sizeof(uint8_t) * args->count);
1602 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1606 int srcu_idx, i, r = 0;
1609 if (args->flags != 0)
1612 /* Enforce sane limit on memory allocation */
1613 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1616 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1620 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1621 sizeof(uint8_t) * args->count);
1627 /* Enable storage key handling for the guest */
1628 r = s390_enable_skey();
1633 down_read(¤t->mm->mmap_sem);
1634 srcu_idx = srcu_read_lock(&kvm->srcu);
1635 while (i < args->count) {
1637 hva = gfn_to_hva(kvm, args->start_gfn + i);
1638 if (kvm_is_error_hva(hva)) {
1643 /* Lowest order bit is reserved */
1644 if (keys[i] & 0x01) {
1649 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1651 r = fixup_user_fault(current, current->mm, hva,
1652 FAULT_FLAG_WRITE, &unlocked);
1659 srcu_read_unlock(&kvm->srcu, srcu_idx);
1660 up_read(¤t->mm->mmap_sem);
1667 * Base address and length must be sent at the start of each block, therefore
1668 * it's cheaper to send some clean data, as long as it's less than the size of
1671 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1672 /* for consistency */
1673 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1676 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1677 * address falls in a hole. In that case the index of one of the memslots
1678 * bordering the hole is returned.
1680 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1682 int start = 0, end = slots->used_slots;
1683 int slot = atomic_read(&slots->lru_slot);
1684 struct kvm_memory_slot *memslots = slots->memslots;
1686 if (gfn >= memslots[slot].base_gfn &&
1687 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1690 while (start < end) {
1691 slot = start + (end - start) / 2;
1693 if (gfn >= memslots[slot].base_gfn)
1699 if (gfn >= memslots[start].base_gfn &&
1700 gfn < memslots[start].base_gfn + memslots[start].npages) {
1701 atomic_set(&slots->lru_slot, start);
1707 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1708 u8 *res, unsigned long bufsize)
1710 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1713 while (args->count < bufsize) {
1714 hva = gfn_to_hva(kvm, cur_gfn);
1716 * We return an error if the first value was invalid, but we
1717 * return successfully if at least one value was copied.
1719 if (kvm_is_error_hva(hva))
1720 return args->count ? 0 : -EFAULT;
1721 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1723 res[args->count++] = (pgstev >> 24) & 0x43;
1730 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1731 unsigned long cur_gfn)
1733 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1734 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1735 unsigned long ofs = cur_gfn - ms->base_gfn;
1737 if (ms->base_gfn + ms->npages <= cur_gfn) {
1739 /* If we are above the highest slot, wrap around */
1741 slotidx = slots->used_slots - 1;
1743 ms = slots->memslots + slotidx;
1746 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1747 while ((slotidx > 0) && (ofs >= ms->npages)) {
1749 ms = slots->memslots + slotidx;
1750 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1752 return ms->base_gfn + ofs;
1755 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1756 u8 *res, unsigned long bufsize)
1758 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1759 struct kvm_memslots *slots = kvm_memslots(kvm);
1760 struct kvm_memory_slot *ms;
1762 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1763 ms = gfn_to_memslot(kvm, cur_gfn);
1765 args->start_gfn = cur_gfn;
1768 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1769 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1771 while (args->count < bufsize) {
1772 hva = gfn_to_hva(kvm, cur_gfn);
1773 if (kvm_is_error_hva(hva))
1775 /* Decrement only if we actually flipped the bit to 0 */
1776 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1777 atomic64_dec(&kvm->arch.cmma_dirty_pages);
1778 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1780 /* Save the value */
1781 res[args->count++] = (pgstev >> 24) & 0x43;
1782 /* If the next bit is too far away, stop. */
1783 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1785 /* If we reached the previous "next", find the next one */
1786 if (cur_gfn == next_gfn)
1787 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1788 /* Reached the end of memory or of the buffer, stop */
1789 if ((next_gfn >= mem_end) ||
1790 (next_gfn - args->start_gfn >= bufsize))
1793 /* Reached the end of the current memslot, take the next one. */
1794 if (cur_gfn - ms->base_gfn >= ms->npages) {
1795 ms = gfn_to_memslot(kvm, cur_gfn);
1804 * This function searches for the next page with dirty CMMA attributes, and
1805 * saves the attributes in the buffer up to either the end of the buffer or
1806 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1807 * no trailing clean bytes are saved.
1808 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1809 * output buffer will indicate 0 as length.
1811 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1812 struct kvm_s390_cmma_log *args)
1814 unsigned long bufsize;
1815 int srcu_idx, peek, ret;
1818 if (!kvm->arch.use_cmma)
1820 /* Invalid/unsupported flags were specified */
1821 if (args->flags & ~KVM_S390_CMMA_PEEK)
1823 /* Migration mode query, and we are not doing a migration */
1824 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1825 if (!peek && !kvm->arch.migration_mode)
1827 /* CMMA is disabled or was not used, or the buffer has length zero */
1828 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1829 if (!bufsize || !kvm->mm->context.uses_cmm) {
1830 memset(args, 0, sizeof(*args));
1833 /* We are not peeking, and there are no dirty pages */
1834 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1835 memset(args, 0, sizeof(*args));
1839 values = vmalloc(bufsize);
1843 down_read(&kvm->mm->mmap_sem);
1844 srcu_idx = srcu_read_lock(&kvm->srcu);
1846 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1848 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1849 srcu_read_unlock(&kvm->srcu, srcu_idx);
1850 up_read(&kvm->mm->mmap_sem);
1852 if (kvm->arch.migration_mode)
1853 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1855 args->remaining = 0;
1857 if (copy_to_user((void __user *)args->values, values, args->count))
1865 * This function sets the CMMA attributes for the given pages. If the input
1866 * buffer has zero length, no action is taken, otherwise the attributes are
1867 * set and the mm->context.uses_cmm flag is set.
1869 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1870 const struct kvm_s390_cmma_log *args)
1872 unsigned long hva, mask, pgstev, i;
1874 int srcu_idx, r = 0;
1878 if (!kvm->arch.use_cmma)
1880 /* invalid/unsupported flags */
1881 if (args->flags != 0)
1883 /* Enforce sane limit on memory allocation */
1884 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1887 if (args->count == 0)
1890 bits = vmalloc(array_size(sizeof(*bits), args->count));
1894 r = copy_from_user(bits, (void __user *)args->values, args->count);
1900 down_read(&kvm->mm->mmap_sem);
1901 srcu_idx = srcu_read_lock(&kvm->srcu);
1902 for (i = 0; i < args->count; i++) {
1903 hva = gfn_to_hva(kvm, args->start_gfn + i);
1904 if (kvm_is_error_hva(hva)) {
1910 pgstev = pgstev << 24;
1911 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1912 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1914 srcu_read_unlock(&kvm->srcu, srcu_idx);
1915 up_read(&kvm->mm->mmap_sem);
1917 if (!kvm->mm->context.uses_cmm) {
1918 down_write(&kvm->mm->mmap_sem);
1919 kvm->mm->context.uses_cmm = 1;
1920 up_write(&kvm->mm->mmap_sem);
1927 long kvm_arch_vm_ioctl(struct file *filp,
1928 unsigned int ioctl, unsigned long arg)
1930 struct kvm *kvm = filp->private_data;
1931 void __user *argp = (void __user *)arg;
1932 struct kvm_device_attr attr;
1936 case KVM_S390_INTERRUPT: {
1937 struct kvm_s390_interrupt s390int;
1940 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1942 r = kvm_s390_inject_vm(kvm, &s390int);
1945 case KVM_CREATE_IRQCHIP: {
1946 struct kvm_irq_routing_entry routing;
1949 if (kvm->arch.use_irqchip) {
1950 /* Set up dummy routing. */
1951 memset(&routing, 0, sizeof(routing));
1952 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1956 case KVM_SET_DEVICE_ATTR: {
1958 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1960 r = kvm_s390_vm_set_attr(kvm, &attr);
1963 case KVM_GET_DEVICE_ATTR: {
1965 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1967 r = kvm_s390_vm_get_attr(kvm, &attr);
1970 case KVM_HAS_DEVICE_ATTR: {
1972 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1974 r = kvm_s390_vm_has_attr(kvm, &attr);
1977 case KVM_S390_GET_SKEYS: {
1978 struct kvm_s390_skeys args;
1981 if (copy_from_user(&args, argp,
1982 sizeof(struct kvm_s390_skeys)))
1984 r = kvm_s390_get_skeys(kvm, &args);
1987 case KVM_S390_SET_SKEYS: {
1988 struct kvm_s390_skeys args;
1991 if (copy_from_user(&args, argp,
1992 sizeof(struct kvm_s390_skeys)))
1994 r = kvm_s390_set_skeys(kvm, &args);
1997 case KVM_S390_GET_CMMA_BITS: {
1998 struct kvm_s390_cmma_log args;
2001 if (copy_from_user(&args, argp, sizeof(args)))
2003 mutex_lock(&kvm->slots_lock);
2004 r = kvm_s390_get_cmma_bits(kvm, &args);
2005 mutex_unlock(&kvm->slots_lock);
2007 r = copy_to_user(argp, &args, sizeof(args));
2013 case KVM_S390_SET_CMMA_BITS: {
2014 struct kvm_s390_cmma_log args;
2017 if (copy_from_user(&args, argp, sizeof(args)))
2019 mutex_lock(&kvm->slots_lock);
2020 r = kvm_s390_set_cmma_bits(kvm, &args);
2021 mutex_unlock(&kvm->slots_lock);
2031 static int kvm_s390_apxa_installed(void)
2033 struct ap_config_info info;
2035 if (ap_instructions_available()) {
2036 if (ap_qci(&info) == 0)
2044 * The format of the crypto control block (CRYCB) is specified in the 3 low
2045 * order bits of the CRYCB designation (CRYCBD) field as follows:
2046 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2047 * AP extended addressing (APXA) facility are installed.
2048 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2049 * Format 2: Both the APXA and MSAX3 facilities are installed
2051 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2053 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2055 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2056 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2058 /* Check whether MSAX3 is installed */
2059 if (!test_kvm_facility(kvm, 76))
2062 if (kvm_s390_apxa_installed())
2063 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2065 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2068 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2069 unsigned long *aqm, unsigned long *adm)
2071 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2073 mutex_lock(&kvm->lock);
2074 kvm_s390_vcpu_block_all(kvm);
2076 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2077 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2078 memcpy(crycb->apcb1.apm, apm, 32);
2079 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2080 apm[0], apm[1], apm[2], apm[3]);
2081 memcpy(crycb->apcb1.aqm, aqm, 32);
2082 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2083 aqm[0], aqm[1], aqm[2], aqm[3]);
2084 memcpy(crycb->apcb1.adm, adm, 32);
2085 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2086 adm[0], adm[1], adm[2], adm[3]);
2089 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2090 memcpy(crycb->apcb0.apm, apm, 8);
2091 memcpy(crycb->apcb0.aqm, aqm, 2);
2092 memcpy(crycb->apcb0.adm, adm, 2);
2093 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2094 apm[0], *((unsigned short *)aqm),
2095 *((unsigned short *)adm));
2097 default: /* Can not happen */
2101 /* recreate the shadow crycb for each vcpu */
2102 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2103 kvm_s390_vcpu_unblock_all(kvm);
2104 mutex_unlock(&kvm->lock);
2106 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2108 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2110 mutex_lock(&kvm->lock);
2111 kvm_s390_vcpu_block_all(kvm);
2113 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2114 sizeof(kvm->arch.crypto.crycb->apcb0));
2115 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2116 sizeof(kvm->arch.crypto.crycb->apcb1));
2118 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2119 /* recreate the shadow crycb for each vcpu */
2120 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2121 kvm_s390_vcpu_unblock_all(kvm);
2122 mutex_unlock(&kvm->lock);
2124 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2126 static u64 kvm_s390_get_initial_cpuid(void)
2131 cpuid.version = 0xff;
2132 return *((u64 *) &cpuid);
2135 static void kvm_s390_crypto_init(struct kvm *kvm)
2137 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2138 kvm_s390_set_crycb_format(kvm);
2140 if (!test_kvm_facility(kvm, 76))
2143 /* Enable AES/DEA protected key functions by default */
2144 kvm->arch.crypto.aes_kw = 1;
2145 kvm->arch.crypto.dea_kw = 1;
2146 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2147 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2148 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2149 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2152 static void sca_dispose(struct kvm *kvm)
2154 if (kvm->arch.use_esca)
2155 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2157 free_page((unsigned long)(kvm->arch.sca));
2158 kvm->arch.sca = NULL;
2161 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2163 gfp_t alloc_flags = GFP_KERNEL;
2165 char debug_name[16];
2166 static unsigned long sca_offset;
2169 #ifdef CONFIG_KVM_S390_UCONTROL
2170 if (type & ~KVM_VM_S390_UCONTROL)
2172 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2179 rc = s390_enable_sie();
2185 if (!sclp.has_64bscao)
2186 alloc_flags |= GFP_DMA;
2187 rwlock_init(&kvm->arch.sca_lock);
2188 /* start with basic SCA */
2189 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2192 spin_lock(&kvm_lock);
2194 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2196 kvm->arch.sca = (struct bsca_block *)
2197 ((char *) kvm->arch.sca + sca_offset);
2198 spin_unlock(&kvm_lock);
2200 sprintf(debug_name, "kvm-%u", current->pid);
2202 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2206 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2207 kvm->arch.sie_page2 =
2208 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2209 if (!kvm->arch.sie_page2)
2212 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2214 for (i = 0; i < kvm_s390_fac_size(); i++) {
2215 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2216 (kvm_s390_fac_base[i] |
2217 kvm_s390_fac_ext[i]);
2218 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2219 kvm_s390_fac_base[i];
2222 /* we are always in czam mode - even on pre z14 machines */
2223 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2224 set_kvm_facility(kvm->arch.model.fac_list, 138);
2225 /* we emulate STHYI in kvm */
2226 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2227 set_kvm_facility(kvm->arch.model.fac_list, 74);
2228 if (MACHINE_HAS_TLB_GUEST) {
2229 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2230 set_kvm_facility(kvm->arch.model.fac_list, 147);
2233 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2234 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2236 kvm_s390_crypto_init(kvm);
2238 mutex_init(&kvm->arch.float_int.ais_lock);
2239 spin_lock_init(&kvm->arch.float_int.lock);
2240 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2241 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2242 init_waitqueue_head(&kvm->arch.ipte_wq);
2243 mutex_init(&kvm->arch.ipte_mutex);
2245 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2246 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2248 if (type & KVM_VM_S390_UCONTROL) {
2249 kvm->arch.gmap = NULL;
2250 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2252 if (sclp.hamax == U64_MAX)
2253 kvm->arch.mem_limit = TASK_SIZE_MAX;
2255 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2257 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2258 if (!kvm->arch.gmap)
2260 kvm->arch.gmap->private = kvm;
2261 kvm->arch.gmap->pfault_enabled = 0;
2264 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2265 kvm->arch.use_skf = sclp.has_skey;
2266 spin_lock_init(&kvm->arch.start_stop_lock);
2267 kvm_s390_vsie_init(kvm);
2268 kvm_s390_gisa_init(kvm);
2269 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2273 free_page((unsigned long)kvm->arch.sie_page2);
2274 debug_unregister(kvm->arch.dbf);
2276 KVM_EVENT(3, "creation of vm failed: %d", rc);
2280 bool kvm_arch_has_vcpu_debugfs(void)
2285 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2290 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2292 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2293 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2294 kvm_s390_clear_local_irqs(vcpu);
2295 kvm_clear_async_pf_completion_queue(vcpu);
2296 if (!kvm_is_ucontrol(vcpu->kvm))
2299 if (kvm_is_ucontrol(vcpu->kvm))
2300 gmap_remove(vcpu->arch.gmap);
2302 if (vcpu->kvm->arch.use_cmma)
2303 kvm_s390_vcpu_unsetup_cmma(vcpu);
2304 free_page((unsigned long)(vcpu->arch.sie_block));
2306 kvm_vcpu_uninit(vcpu);
2307 kmem_cache_free(kvm_vcpu_cache, vcpu);
2310 static void kvm_free_vcpus(struct kvm *kvm)
2313 struct kvm_vcpu *vcpu;
2315 kvm_for_each_vcpu(i, vcpu, kvm)
2316 kvm_arch_vcpu_destroy(vcpu);
2318 mutex_lock(&kvm->lock);
2319 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2320 kvm->vcpus[i] = NULL;
2322 atomic_set(&kvm->online_vcpus, 0);
2323 mutex_unlock(&kvm->lock);
2326 void kvm_arch_destroy_vm(struct kvm *kvm)
2328 kvm_free_vcpus(kvm);
2330 debug_unregister(kvm->arch.dbf);
2331 kvm_s390_gisa_destroy(kvm);
2332 free_page((unsigned long)kvm->arch.sie_page2);
2333 if (!kvm_is_ucontrol(kvm))
2334 gmap_remove(kvm->arch.gmap);
2335 kvm_s390_destroy_adapters(kvm);
2336 kvm_s390_clear_float_irqs(kvm);
2337 kvm_s390_vsie_destroy(kvm);
2338 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2341 /* Section: vcpu related */
2342 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2344 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2345 if (!vcpu->arch.gmap)
2347 vcpu->arch.gmap->private = vcpu->kvm;
2352 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2354 if (!kvm_s390_use_sca_entries())
2356 read_lock(&vcpu->kvm->arch.sca_lock);
2357 if (vcpu->kvm->arch.use_esca) {
2358 struct esca_block *sca = vcpu->kvm->arch.sca;
2360 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2361 sca->cpu[vcpu->vcpu_id].sda = 0;
2363 struct bsca_block *sca = vcpu->kvm->arch.sca;
2365 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2366 sca->cpu[vcpu->vcpu_id].sda = 0;
2368 read_unlock(&vcpu->kvm->arch.sca_lock);
2371 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2373 if (!kvm_s390_use_sca_entries()) {
2374 struct bsca_block *sca = vcpu->kvm->arch.sca;
2376 /* we still need the basic sca for the ipte control */
2377 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2378 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2381 read_lock(&vcpu->kvm->arch.sca_lock);
2382 if (vcpu->kvm->arch.use_esca) {
2383 struct esca_block *sca = vcpu->kvm->arch.sca;
2385 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2386 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2387 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2388 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2389 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2391 struct bsca_block *sca = vcpu->kvm->arch.sca;
2393 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2394 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2395 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2396 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2398 read_unlock(&vcpu->kvm->arch.sca_lock);
2401 /* Basic SCA to Extended SCA data copy routines */
2402 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2405 d->sigp_ctrl.c = s->sigp_ctrl.c;
2406 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2409 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2413 d->ipte_control = s->ipte_control;
2415 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2416 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2419 static int sca_switch_to_extended(struct kvm *kvm)
2421 struct bsca_block *old_sca = kvm->arch.sca;
2422 struct esca_block *new_sca;
2423 struct kvm_vcpu *vcpu;
2424 unsigned int vcpu_idx;
2427 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2431 scaoh = (u32)((u64)(new_sca) >> 32);
2432 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2434 kvm_s390_vcpu_block_all(kvm);
2435 write_lock(&kvm->arch.sca_lock);
2437 sca_copy_b_to_e(new_sca, old_sca);
2439 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2440 vcpu->arch.sie_block->scaoh = scaoh;
2441 vcpu->arch.sie_block->scaol = scaol;
2442 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2444 kvm->arch.sca = new_sca;
2445 kvm->arch.use_esca = 1;
2447 write_unlock(&kvm->arch.sca_lock);
2448 kvm_s390_vcpu_unblock_all(kvm);
2450 free_page((unsigned long)old_sca);
2452 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2453 old_sca, kvm->arch.sca);
2457 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2461 if (!kvm_s390_use_sca_entries()) {
2462 if (id < KVM_MAX_VCPUS)
2466 if (id < KVM_S390_BSCA_CPU_SLOTS)
2468 if (!sclp.has_esca || !sclp.has_64bscao)
2471 mutex_lock(&kvm->lock);
2472 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2473 mutex_unlock(&kvm->lock);
2475 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2478 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2480 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2481 kvm_clear_async_pf_completion_queue(vcpu);
2482 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2488 kvm_s390_set_prefix(vcpu, 0);
2489 if (test_kvm_facility(vcpu->kvm, 64))
2490 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2491 if (test_kvm_facility(vcpu->kvm, 82))
2492 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2493 if (test_kvm_facility(vcpu->kvm, 133))
2494 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2495 if (test_kvm_facility(vcpu->kvm, 156))
2496 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2497 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2498 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2501 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2503 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2505 if (kvm_is_ucontrol(vcpu->kvm))
2506 return __kvm_ucontrol_vcpu_init(vcpu);
2511 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2512 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2514 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2515 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2516 vcpu->arch.cputm_start = get_tod_clock_fast();
2517 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2520 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2521 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2523 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2524 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2525 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2526 vcpu->arch.cputm_start = 0;
2527 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2530 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2531 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2533 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2534 vcpu->arch.cputm_enabled = true;
2535 __start_cpu_timer_accounting(vcpu);
2538 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2539 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2541 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2542 __stop_cpu_timer_accounting(vcpu);
2543 vcpu->arch.cputm_enabled = false;
2546 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2548 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2549 __enable_cpu_timer_accounting(vcpu);
2553 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2555 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2556 __disable_cpu_timer_accounting(vcpu);
2560 /* set the cpu timer - may only be called from the VCPU thread itself */
2561 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2563 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2564 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2565 if (vcpu->arch.cputm_enabled)
2566 vcpu->arch.cputm_start = get_tod_clock_fast();
2567 vcpu->arch.sie_block->cputm = cputm;
2568 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2572 /* update and get the cpu timer - can also be called from other VCPU threads */
2573 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2578 if (unlikely(!vcpu->arch.cputm_enabled))
2579 return vcpu->arch.sie_block->cputm;
2581 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2583 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2585 * If the writer would ever execute a read in the critical
2586 * section, e.g. in irq context, we have a deadlock.
2588 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2589 value = vcpu->arch.sie_block->cputm;
2590 /* if cputm_start is 0, accounting is being started/stopped */
2591 if (likely(vcpu->arch.cputm_start))
2592 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2593 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2598 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2601 gmap_enable(vcpu->arch.enabled_gmap);
2602 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2603 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2604 __start_cpu_timer_accounting(vcpu);
2608 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2611 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2612 __stop_cpu_timer_accounting(vcpu);
2613 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2614 vcpu->arch.enabled_gmap = gmap_get_enabled();
2615 gmap_disable(vcpu->arch.enabled_gmap);
2619 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2621 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2622 vcpu->arch.sie_block->gpsw.mask = 0UL;
2623 vcpu->arch.sie_block->gpsw.addr = 0UL;
2624 kvm_s390_set_prefix(vcpu, 0);
2625 kvm_s390_set_cpu_timer(vcpu, 0);
2626 vcpu->arch.sie_block->ckc = 0UL;
2627 vcpu->arch.sie_block->todpr = 0;
2628 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2629 vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 |
2630 CR0_INTERRUPT_KEY_SUBMASK |
2631 CR0_MEASUREMENT_ALERT_SUBMASK;
2632 vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2634 CR14_EXTERNAL_DAMAGE_SUBMASK;
2635 /* make sure the new fpc will be lazily loaded */
2637 current->thread.fpu.fpc = 0;
2638 vcpu->arch.sie_block->gbea = 1;
2639 vcpu->arch.sie_block->pp = 0;
2640 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2641 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2642 kvm_clear_async_pf_completion_queue(vcpu);
2643 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2644 kvm_s390_vcpu_stop(vcpu);
2645 kvm_s390_clear_local_irqs(vcpu);
2648 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2650 mutex_lock(&vcpu->kvm->lock);
2652 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2653 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2655 mutex_unlock(&vcpu->kvm->lock);
2656 if (!kvm_is_ucontrol(vcpu->kvm)) {
2657 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2660 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2661 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2662 /* make vcpu_load load the right gmap on the first trigger */
2663 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2666 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2669 * If the AP instructions are not being interpreted and the MSAX3
2670 * facility is not configured for the guest, there is nothing to set up.
2672 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2675 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2676 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2677 vcpu->arch.sie_block->eca &= ~ECA_APIE;
2679 if (vcpu->kvm->arch.crypto.apie)
2680 vcpu->arch.sie_block->eca |= ECA_APIE;
2682 /* Set up protected key support */
2683 if (vcpu->kvm->arch.crypto.aes_kw)
2684 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2685 if (vcpu->kvm->arch.crypto.dea_kw)
2686 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2689 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2691 free_page(vcpu->arch.sie_block->cbrlo);
2692 vcpu->arch.sie_block->cbrlo = 0;
2695 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2697 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2698 if (!vcpu->arch.sie_block->cbrlo)
2703 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2705 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2707 vcpu->arch.sie_block->ibc = model->ibc;
2708 if (test_kvm_facility(vcpu->kvm, 7))
2709 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2712 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2716 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2720 if (test_kvm_facility(vcpu->kvm, 78))
2721 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2722 else if (test_kvm_facility(vcpu->kvm, 8))
2723 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2725 kvm_s390_vcpu_setup_model(vcpu);
2727 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2728 if (MACHINE_HAS_ESOP)
2729 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2730 if (test_kvm_facility(vcpu->kvm, 9))
2731 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2732 if (test_kvm_facility(vcpu->kvm, 73))
2733 vcpu->arch.sie_block->ecb |= ECB_TE;
2735 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2736 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2737 if (test_kvm_facility(vcpu->kvm, 130))
2738 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2739 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2741 vcpu->arch.sie_block->eca |= ECA_CEI;
2743 vcpu->arch.sie_block->eca |= ECA_IB;
2745 vcpu->arch.sie_block->eca |= ECA_SII;
2746 if (sclp.has_sigpif)
2747 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2748 if (test_kvm_facility(vcpu->kvm, 129)) {
2749 vcpu->arch.sie_block->eca |= ECA_VX;
2750 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2752 if (test_kvm_facility(vcpu->kvm, 139))
2753 vcpu->arch.sie_block->ecd |= ECD_MEF;
2754 if (test_kvm_facility(vcpu->kvm, 156))
2755 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2756 if (vcpu->arch.sie_block->gd) {
2757 vcpu->arch.sie_block->eca |= ECA_AIV;
2758 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2759 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2761 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2763 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2766 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2768 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2770 if (vcpu->kvm->arch.use_cmma) {
2771 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2775 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2776 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2778 vcpu->arch.sie_block->hpid = HPID_KVM;
2780 kvm_s390_vcpu_crypto_setup(vcpu);
2785 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2788 struct kvm_vcpu *vcpu;
2789 struct sie_page *sie_page;
2792 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2797 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2801 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2802 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2806 vcpu->arch.sie_block = &sie_page->sie_block;
2807 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2809 /* the real guest size will always be smaller than msl */
2810 vcpu->arch.sie_block->mso = 0;
2811 vcpu->arch.sie_block->msl = sclp.hamax;
2813 vcpu->arch.sie_block->icpua = id;
2814 spin_lock_init(&vcpu->arch.local_int.lock);
2815 vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2816 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2817 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2818 seqcount_init(&vcpu->arch.cputm_seqcount);
2820 rc = kvm_vcpu_init(vcpu, kvm, id);
2822 goto out_free_sie_block;
2823 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2824 vcpu->arch.sie_block);
2825 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2829 free_page((unsigned long)(vcpu->arch.sie_block));
2831 kmem_cache_free(kvm_vcpu_cache, vcpu);
2836 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2838 return kvm_s390_vcpu_has_irq(vcpu, 0);
2841 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2843 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2846 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2848 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2852 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2854 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2857 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2859 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2863 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
2865 return atomic_read(&vcpu->arch.sie_block->prog20) &
2866 (PROG_BLOCK_SIE | PROG_REQUEST);
2869 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2871 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2875 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
2876 * If the CPU is not running (e.g. waiting as idle) the function will
2877 * return immediately. */
2878 void exit_sie(struct kvm_vcpu *vcpu)
2880 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2881 kvm_s390_vsie_kick(vcpu);
2882 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2886 /* Kick a guest cpu out of SIE to process a request synchronously */
2887 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2889 kvm_make_request(req, vcpu);
2890 kvm_s390_vcpu_request(vcpu);
2893 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2896 struct kvm *kvm = gmap->private;
2897 struct kvm_vcpu *vcpu;
2898 unsigned long prefix;
2901 if (gmap_is_shadow(gmap))
2903 if (start >= 1UL << 31)
2904 /* We are only interested in prefix pages */
2906 kvm_for_each_vcpu(i, vcpu, kvm) {
2907 /* match against both prefix pages */
2908 prefix = kvm_s390_get_prefix(vcpu);
2909 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2910 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2912 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2917 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2919 /* kvm common code refers to this, but never calls it */
2924 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2925 struct kvm_one_reg *reg)
2930 case KVM_REG_S390_TODPR:
2931 r = put_user(vcpu->arch.sie_block->todpr,
2932 (u32 __user *)reg->addr);
2934 case KVM_REG_S390_EPOCHDIFF:
2935 r = put_user(vcpu->arch.sie_block->epoch,
2936 (u64 __user *)reg->addr);
2938 case KVM_REG_S390_CPU_TIMER:
2939 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2940 (u64 __user *)reg->addr);
2942 case KVM_REG_S390_CLOCK_COMP:
2943 r = put_user(vcpu->arch.sie_block->ckc,
2944 (u64 __user *)reg->addr);
2946 case KVM_REG_S390_PFTOKEN:
2947 r = put_user(vcpu->arch.pfault_token,
2948 (u64 __user *)reg->addr);
2950 case KVM_REG_S390_PFCOMPARE:
2951 r = put_user(vcpu->arch.pfault_compare,
2952 (u64 __user *)reg->addr);
2954 case KVM_REG_S390_PFSELECT:
2955 r = put_user(vcpu->arch.pfault_select,
2956 (u64 __user *)reg->addr);
2958 case KVM_REG_S390_PP:
2959 r = put_user(vcpu->arch.sie_block->pp,
2960 (u64 __user *)reg->addr);
2962 case KVM_REG_S390_GBEA:
2963 r = put_user(vcpu->arch.sie_block->gbea,
2964 (u64 __user *)reg->addr);
2973 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2974 struct kvm_one_reg *reg)
2980 case KVM_REG_S390_TODPR:
2981 r = get_user(vcpu->arch.sie_block->todpr,
2982 (u32 __user *)reg->addr);
2984 case KVM_REG_S390_EPOCHDIFF:
2985 r = get_user(vcpu->arch.sie_block->epoch,
2986 (u64 __user *)reg->addr);
2988 case KVM_REG_S390_CPU_TIMER:
2989 r = get_user(val, (u64 __user *)reg->addr);
2991 kvm_s390_set_cpu_timer(vcpu, val);
2993 case KVM_REG_S390_CLOCK_COMP:
2994 r = get_user(vcpu->arch.sie_block->ckc,
2995 (u64 __user *)reg->addr);
2997 case KVM_REG_S390_PFTOKEN:
2998 r = get_user(vcpu->arch.pfault_token,
2999 (u64 __user *)reg->addr);
3000 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3001 kvm_clear_async_pf_completion_queue(vcpu);
3003 case KVM_REG_S390_PFCOMPARE:
3004 r = get_user(vcpu->arch.pfault_compare,
3005 (u64 __user *)reg->addr);
3007 case KVM_REG_S390_PFSELECT:
3008 r = get_user(vcpu->arch.pfault_select,
3009 (u64 __user *)reg->addr);
3011 case KVM_REG_S390_PP:
3012 r = get_user(vcpu->arch.sie_block->pp,
3013 (u64 __user *)reg->addr);
3015 case KVM_REG_S390_GBEA:
3016 r = get_user(vcpu->arch.sie_block->gbea,
3017 (u64 __user *)reg->addr);
3026 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3028 kvm_s390_vcpu_initial_reset(vcpu);
3032 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3035 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3040 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3043 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3048 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3049 struct kvm_sregs *sregs)
3053 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3054 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3060 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3061 struct kvm_sregs *sregs)
3065 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3066 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3072 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3078 if (test_fp_ctl(fpu->fpc)) {
3082 vcpu->run->s.regs.fpc = fpu->fpc;
3084 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3085 (freg_t *) fpu->fprs);
3087 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3094 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3098 /* make sure we have the latest values */
3101 convert_vx_to_fp((freg_t *) fpu->fprs,
3102 (__vector128 *) vcpu->run->s.regs.vrs);
3104 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3105 fpu->fpc = vcpu->run->s.regs.fpc;
3111 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3115 if (!is_vcpu_stopped(vcpu))
3118 vcpu->run->psw_mask = psw.mask;
3119 vcpu->run->psw_addr = psw.addr;
3124 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3125 struct kvm_translation *tr)
3127 return -EINVAL; /* not implemented yet */
3130 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3131 KVM_GUESTDBG_USE_HW_BP | \
3132 KVM_GUESTDBG_ENABLE)
3134 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3135 struct kvm_guest_debug *dbg)
3141 vcpu->guest_debug = 0;
3142 kvm_s390_clear_bp_data(vcpu);
3144 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3148 if (!sclp.has_gpere) {
3153 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3154 vcpu->guest_debug = dbg->control;
3155 /* enforce guest PER */
3156 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3158 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3159 rc = kvm_s390_import_bp_data(vcpu, dbg);
3161 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3162 vcpu->arch.guestdbg.last_bp = 0;
3166 vcpu->guest_debug = 0;
3167 kvm_s390_clear_bp_data(vcpu);
3168 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3176 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3177 struct kvm_mp_state *mp_state)
3183 /* CHECK_STOP and LOAD are not supported yet */
3184 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3185 KVM_MP_STATE_OPERATING;
3191 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3192 struct kvm_mp_state *mp_state)
3198 /* user space knows about this interface - let it control the state */
3199 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3201 switch (mp_state->mp_state) {
3202 case KVM_MP_STATE_STOPPED:
3203 kvm_s390_vcpu_stop(vcpu);
3205 case KVM_MP_STATE_OPERATING:
3206 kvm_s390_vcpu_start(vcpu);
3208 case KVM_MP_STATE_LOAD:
3209 case KVM_MP_STATE_CHECK_STOP:
3210 /* fall through - CHECK_STOP and LOAD are not supported yet */
3219 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3221 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3224 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3227 kvm_s390_vcpu_request_handled(vcpu);
3228 if (!kvm_request_pending(vcpu))
3231 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3232 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3233 * This ensures that the ipte instruction for this request has
3234 * already finished. We might race against a second unmapper that
3235 * wants to set the blocking bit. Lets just retry the request loop.
3237 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3239 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3240 kvm_s390_get_prefix(vcpu),
3241 PAGE_SIZE * 2, PROT_WRITE);
3243 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3249 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3250 vcpu->arch.sie_block->ihcpu = 0xffff;
3254 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3255 if (!ibs_enabled(vcpu)) {
3256 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3257 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3262 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3263 if (ibs_enabled(vcpu)) {
3264 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3265 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3270 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3271 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3275 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3277 * Disable CMM virtualization; we will emulate the ESSA
3278 * instruction manually, in order to provide additional
3279 * functionalities needed for live migration.
3281 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3285 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3287 * Re-enable CMM virtualization if CMMA is available and
3288 * CMM has been used.
3290 if ((vcpu->kvm->arch.use_cmma) &&
3291 (vcpu->kvm->mm->context.uses_cmm))
3292 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3296 /* nothing to do, just clear the request */
3297 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3298 /* we left the vsie handler, nothing to do, just clear the request */
3299 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3304 void kvm_s390_set_tod_clock(struct kvm *kvm,
3305 const struct kvm_s390_vm_tod_clock *gtod)
3307 struct kvm_vcpu *vcpu;
3308 struct kvm_s390_tod_clock_ext htod;
3311 mutex_lock(&kvm->lock);
3314 get_tod_clock_ext((char *)&htod);
3316 kvm->arch.epoch = gtod->tod - htod.tod;
3318 if (test_kvm_facility(kvm, 139)) {
3319 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3320 if (kvm->arch.epoch > gtod->tod)
3321 kvm->arch.epdx -= 1;
3324 kvm_s390_vcpu_block_all(kvm);
3325 kvm_for_each_vcpu(i, vcpu, kvm) {
3326 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3327 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3330 kvm_s390_vcpu_unblock_all(kvm);
3332 mutex_unlock(&kvm->lock);
3336 * kvm_arch_fault_in_page - fault-in guest page if necessary
3337 * @vcpu: The corresponding virtual cpu
3338 * @gpa: Guest physical address
3339 * @writable: Whether the page should be writable or not
3341 * Make sure that a guest page has been faulted-in on the host.
3343 * Return: Zero on success, negative error code otherwise.
3345 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3347 return gmap_fault(vcpu->arch.gmap, gpa,
3348 writable ? FAULT_FLAG_WRITE : 0);
3351 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3352 unsigned long token)
3354 struct kvm_s390_interrupt inti;
3355 struct kvm_s390_irq irq;
3358 irq.u.ext.ext_params2 = token;
3359 irq.type = KVM_S390_INT_PFAULT_INIT;
3360 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3362 inti.type = KVM_S390_INT_PFAULT_DONE;
3363 inti.parm64 = token;
3364 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3368 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3369 struct kvm_async_pf *work)
3371 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3372 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3375 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3376 struct kvm_async_pf *work)
3378 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3379 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3382 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3383 struct kvm_async_pf *work)
3385 /* s390 will always inject the page directly */
3388 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3391 * s390 will always inject the page directly,
3392 * but we still want check_async_completion to cleanup
3397 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3400 struct kvm_arch_async_pf arch;
3403 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3405 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3406 vcpu->arch.pfault_compare)
3408 if (psw_extint_disabled(vcpu))
3410 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3412 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3414 if (!vcpu->arch.gmap->pfault_enabled)
3417 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3418 hva += current->thread.gmap_addr & ~PAGE_MASK;
3419 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3422 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3426 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3431 * On s390 notifications for arriving pages will be delivered directly
3432 * to the guest but the house keeping for completed pfaults is
3433 * handled outside the worker.
3435 kvm_check_async_pf_completion(vcpu);
3437 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3438 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3443 if (test_cpu_flag(CIF_MCCK_PENDING))
3446 if (!kvm_is_ucontrol(vcpu->kvm)) {
3447 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3452 rc = kvm_s390_handle_requests(vcpu);
3456 if (guestdbg_enabled(vcpu)) {
3457 kvm_s390_backup_guest_per_regs(vcpu);
3458 kvm_s390_patch_guest_per_regs(vcpu);
3461 vcpu->arch.sie_block->icptcode = 0;
3462 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3463 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3464 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3469 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3471 struct kvm_s390_pgm_info pgm_info = {
3472 .code = PGM_ADDRESSING,
3477 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3478 trace_kvm_s390_sie_fault(vcpu);
3481 * We want to inject an addressing exception, which is defined as a
3482 * suppressing or terminating exception. However, since we came here
3483 * by a DAT access exception, the PSW still points to the faulting
3484 * instruction since DAT exceptions are nullifying. So we've got
3485 * to look up the current opcode to get the length of the instruction
3486 * to be able to forward the PSW.
3488 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3489 ilen = insn_length(opcode);
3493 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3494 * Forward by arbitrary ilc, injection will take care of
3495 * nullification if necessary.
3497 pgm_info = vcpu->arch.pgm;
3500 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3501 kvm_s390_forward_psw(vcpu, ilen);
3502 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3505 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3507 struct mcck_volatile_info *mcck_info;
3508 struct sie_page *sie_page;
3510 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3511 vcpu->arch.sie_block->icptcode);
3512 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3514 if (guestdbg_enabled(vcpu))
3515 kvm_s390_restore_guest_per_regs(vcpu);
3517 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3518 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3520 if (exit_reason == -EINTR) {
3521 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3522 sie_page = container_of(vcpu->arch.sie_block,
3523 struct sie_page, sie_block);
3524 mcck_info = &sie_page->mcck_info;
3525 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3529 if (vcpu->arch.sie_block->icptcode > 0) {
3530 int rc = kvm_handle_sie_intercept(vcpu);
3532 if (rc != -EOPNOTSUPP)
3534 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3535 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3536 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3537 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3539 } else if (exit_reason != -EFAULT) {
3540 vcpu->stat.exit_null++;
3542 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3543 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3544 vcpu->run->s390_ucontrol.trans_exc_code =
3545 current->thread.gmap_addr;
3546 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3548 } else if (current->thread.gmap_pfault) {
3549 trace_kvm_s390_major_guest_pfault(vcpu);
3550 current->thread.gmap_pfault = 0;
3551 if (kvm_arch_setup_async_pf(vcpu))
3553 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3555 return vcpu_post_run_fault_in_sie(vcpu);
3558 static int __vcpu_run(struct kvm_vcpu *vcpu)
3560 int rc, exit_reason;
3563 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3564 * ning the guest), so that memslots (and other stuff) are protected
3566 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3569 rc = vcpu_pre_run(vcpu);
3573 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3575 * As PF_VCPU will be used in fault handler, between
3576 * guest_enter and guest_exit should be no uaccess.
3578 local_irq_disable();
3579 guest_enter_irqoff();
3580 __disable_cpu_timer_accounting(vcpu);
3582 exit_reason = sie64a(vcpu->arch.sie_block,
3583 vcpu->run->s.regs.gprs);
3584 local_irq_disable();
3585 __enable_cpu_timer_accounting(vcpu);
3586 guest_exit_irqoff();
3588 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3590 rc = vcpu_post_run(vcpu, exit_reason);
3591 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3593 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3597 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3599 struct runtime_instr_cb *riccb;
3602 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3603 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3604 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3605 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3606 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3607 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3608 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3609 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3610 /* some control register changes require a tlb flush */
3611 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3613 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3614 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3615 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3616 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3617 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3618 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3620 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3621 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3622 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3623 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3624 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3625 kvm_clear_async_pf_completion_queue(vcpu);
3628 * If userspace sets the riccb (e.g. after migration) to a valid state,
3629 * we should enable RI here instead of doing the lazy enablement.
3631 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3632 test_kvm_facility(vcpu->kvm, 64) &&
3634 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3635 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3636 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3639 * If userspace sets the gscb (e.g. after migration) to non-zero,
3640 * we should enable GS here instead of doing the lazy enablement.
3642 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3643 test_kvm_facility(vcpu->kvm, 133) &&
3645 !vcpu->arch.gs_enabled) {
3646 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3647 vcpu->arch.sie_block->ecb |= ECB_GS;
3648 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3649 vcpu->arch.gs_enabled = 1;
3651 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3652 test_kvm_facility(vcpu->kvm, 82)) {
3653 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3654 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3656 save_access_regs(vcpu->arch.host_acrs);
3657 restore_access_regs(vcpu->run->s.regs.acrs);
3658 /* save host (userspace) fprs/vrs */
3660 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3661 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3663 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3665 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3666 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3667 if (test_fp_ctl(current->thread.fpu.fpc))
3668 /* User space provided an invalid FPC, let's clear it */
3669 current->thread.fpu.fpc = 0;
3670 if (MACHINE_HAS_GS) {
3672 __ctl_set_bit(2, 4);
3673 if (current->thread.gs_cb) {
3674 vcpu->arch.host_gscb = current->thread.gs_cb;
3675 save_gs_cb(vcpu->arch.host_gscb);
3677 if (vcpu->arch.gs_enabled) {
3678 current->thread.gs_cb = (struct gs_cb *)
3679 &vcpu->run->s.regs.gscb;
3680 restore_gs_cb(current->thread.gs_cb);
3684 /* SIE will load etoken directly from SDNX and therefore kvm_run */
3686 kvm_run->kvm_dirty_regs = 0;
3689 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3691 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3692 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3693 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3694 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3695 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3696 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3697 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3698 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3699 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3700 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3701 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3702 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3703 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3704 save_access_regs(vcpu->run->s.regs.acrs);
3705 restore_access_regs(vcpu->arch.host_acrs);
3706 /* Save guest register state */
3708 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3709 /* Restore will be done lazily at return */
3710 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3711 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3712 if (MACHINE_HAS_GS) {
3713 __ctl_set_bit(2, 4);
3714 if (vcpu->arch.gs_enabled)
3715 save_gs_cb(current->thread.gs_cb);
3717 current->thread.gs_cb = vcpu->arch.host_gscb;
3718 restore_gs_cb(vcpu->arch.host_gscb);
3720 if (!vcpu->arch.host_gscb)
3721 __ctl_clear_bit(2, 4);
3722 vcpu->arch.host_gscb = NULL;
3724 /* SIE will save etoken directly into SDNX and therefore kvm_run */
3727 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3731 if (kvm_run->immediate_exit)
3736 if (guestdbg_exit_pending(vcpu)) {
3737 kvm_s390_prepare_debug_exit(vcpu);
3742 kvm_sigset_activate(vcpu);
3744 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3745 kvm_s390_vcpu_start(vcpu);
3746 } else if (is_vcpu_stopped(vcpu)) {
3747 pr_err_ratelimited("can't run stopped vcpu %d\n",
3753 sync_regs(vcpu, kvm_run);
3754 enable_cpu_timer_accounting(vcpu);
3757 rc = __vcpu_run(vcpu);
3759 if (signal_pending(current) && !rc) {
3760 kvm_run->exit_reason = KVM_EXIT_INTR;
3764 if (guestdbg_exit_pending(vcpu) && !rc) {
3765 kvm_s390_prepare_debug_exit(vcpu);
3769 if (rc == -EREMOTE) {
3770 /* userspace support is needed, kvm_run has been prepared */
3774 disable_cpu_timer_accounting(vcpu);
3775 store_regs(vcpu, kvm_run);
3777 kvm_sigset_deactivate(vcpu);
3779 vcpu->stat.exit_userspace++;
3786 * store status at address
3787 * we use have two special cases:
3788 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3789 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3791 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3793 unsigned char archmode = 1;
3794 freg_t fprs[NUM_FPRS];
3799 px = kvm_s390_get_prefix(vcpu);
3800 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3801 if (write_guest_abs(vcpu, 163, &archmode, 1))
3804 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3805 if (write_guest_real(vcpu, 163, &archmode, 1))
3809 gpa -= __LC_FPREGS_SAVE_AREA;
3811 /* manually convert vector registers if necessary */
3812 if (MACHINE_HAS_VX) {
3813 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3814 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3817 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3818 vcpu->run->s.regs.fprs, 128);
3820 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3821 vcpu->run->s.regs.gprs, 128);
3822 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3823 &vcpu->arch.sie_block->gpsw, 16);
3824 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3826 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3827 &vcpu->run->s.regs.fpc, 4);
3828 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3829 &vcpu->arch.sie_block->todpr, 4);
3830 cputm = kvm_s390_get_cpu_timer(vcpu);
3831 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3833 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3834 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3836 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3837 &vcpu->run->s.regs.acrs, 64);
3838 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3839 &vcpu->arch.sie_block->gcr, 128);
3840 return rc ? -EFAULT : 0;
3843 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3846 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3847 * switch in the run ioctl. Let's update our copies before we save
3848 * it into the save area
3851 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3852 save_access_regs(vcpu->run->s.regs.acrs);
3854 return kvm_s390_store_status_unloaded(vcpu, addr);
3857 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3859 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3860 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3863 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3866 struct kvm_vcpu *vcpu;
3868 kvm_for_each_vcpu(i, vcpu, kvm) {
3869 __disable_ibs_on_vcpu(vcpu);
3873 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3877 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3878 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3881 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3883 int i, online_vcpus, started_vcpus = 0;
3885 if (!is_vcpu_stopped(vcpu))
3888 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3889 /* Only one cpu at a time may enter/leave the STOPPED state. */
3890 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3891 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3893 for (i = 0; i < online_vcpus; i++) {
3894 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3898 if (started_vcpus == 0) {
3899 /* we're the only active VCPU -> speed it up */
3900 __enable_ibs_on_vcpu(vcpu);
3901 } else if (started_vcpus == 1) {
3903 * As we are starting a second VCPU, we have to disable
3904 * the IBS facility on all VCPUs to remove potentially
3905 * oustanding ENABLE requests.
3907 __disable_ibs_on_all_vcpus(vcpu->kvm);
3910 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3912 * Another VCPU might have used IBS while we were offline.
3913 * Let's play safe and flush the VCPU at startup.
3915 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3916 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3920 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3922 int i, online_vcpus, started_vcpus = 0;
3923 struct kvm_vcpu *started_vcpu = NULL;
3925 if (is_vcpu_stopped(vcpu))
3928 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3929 /* Only one cpu at a time may enter/leave the STOPPED state. */
3930 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3931 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3933 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3934 kvm_s390_clear_stop_irq(vcpu);
3936 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3937 __disable_ibs_on_vcpu(vcpu);
3939 for (i = 0; i < online_vcpus; i++) {
3940 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3942 started_vcpu = vcpu->kvm->vcpus[i];
3946 if (started_vcpus == 1) {
3948 * As we only have one VCPU left, we want to enable the
3949 * IBS facility for that VCPU to speed it up.
3951 __enable_ibs_on_vcpu(started_vcpu);
3954 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3958 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3959 struct kvm_enable_cap *cap)
3967 case KVM_CAP_S390_CSS_SUPPORT:
3968 if (!vcpu->kvm->arch.css_support) {
3969 vcpu->kvm->arch.css_support = 1;
3970 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3971 trace_kvm_s390_enable_css(vcpu->kvm);
3982 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3983 struct kvm_s390_mem_op *mop)
3985 void __user *uaddr = (void __user *)mop->buf;
3986 void *tmpbuf = NULL;
3988 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3989 | KVM_S390_MEMOP_F_CHECK_ONLY;
3991 if (mop->flags & ~supported_flags)
3994 if (mop->size > MEM_OP_MAX_SIZE)
3997 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3998 tmpbuf = vmalloc(mop->size);
4003 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4006 case KVM_S390_MEMOP_LOGICAL_READ:
4007 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4008 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4009 mop->size, GACC_FETCH);
4012 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4014 if (copy_to_user(uaddr, tmpbuf, mop->size))
4018 case KVM_S390_MEMOP_LOGICAL_WRITE:
4019 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4020 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4021 mop->size, GACC_STORE);
4024 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4028 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4034 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4036 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4037 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4043 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4044 unsigned int ioctl, unsigned long arg)
4046 struct kvm_vcpu *vcpu = filp->private_data;
4047 void __user *argp = (void __user *)arg;
4050 case KVM_S390_IRQ: {
4051 struct kvm_s390_irq s390irq;
4053 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4055 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4057 case KVM_S390_INTERRUPT: {
4058 struct kvm_s390_interrupt s390int;
4059 struct kvm_s390_irq s390irq;
4061 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4063 if (s390int_to_s390irq(&s390int, &s390irq))
4065 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4068 return -ENOIOCTLCMD;
4071 long kvm_arch_vcpu_ioctl(struct file *filp,
4072 unsigned int ioctl, unsigned long arg)
4074 struct kvm_vcpu *vcpu = filp->private_data;
4075 void __user *argp = (void __user *)arg;
4082 case KVM_S390_STORE_STATUS:
4083 idx = srcu_read_lock(&vcpu->kvm->srcu);
4084 r = kvm_s390_vcpu_store_status(vcpu, arg);
4085 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4087 case KVM_S390_SET_INITIAL_PSW: {
4091 if (copy_from_user(&psw, argp, sizeof(psw)))
4093 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4096 case KVM_S390_INITIAL_RESET:
4097 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4099 case KVM_SET_ONE_REG:
4100 case KVM_GET_ONE_REG: {
4101 struct kvm_one_reg reg;
4103 if (copy_from_user(®, argp, sizeof(reg)))
4105 if (ioctl == KVM_SET_ONE_REG)
4106 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4108 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4111 #ifdef CONFIG_KVM_S390_UCONTROL
4112 case KVM_S390_UCAS_MAP: {
4113 struct kvm_s390_ucas_mapping ucasmap;
4115 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4120 if (!kvm_is_ucontrol(vcpu->kvm)) {
4125 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4126 ucasmap.vcpu_addr, ucasmap.length);
4129 case KVM_S390_UCAS_UNMAP: {
4130 struct kvm_s390_ucas_mapping ucasmap;
4132 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4137 if (!kvm_is_ucontrol(vcpu->kvm)) {
4142 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4147 case KVM_S390_VCPU_FAULT: {
4148 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4151 case KVM_ENABLE_CAP:
4153 struct kvm_enable_cap cap;
4155 if (copy_from_user(&cap, argp, sizeof(cap)))
4157 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4160 case KVM_S390_MEM_OP: {
4161 struct kvm_s390_mem_op mem_op;
4163 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4164 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4169 case KVM_S390_SET_IRQ_STATE: {
4170 struct kvm_s390_irq_state irq_state;
4173 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4175 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4176 irq_state.len == 0 ||
4177 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4181 /* do not use irq_state.flags, it will break old QEMUs */
4182 r = kvm_s390_set_irq_state(vcpu,
4183 (void __user *) irq_state.buf,
4187 case KVM_S390_GET_IRQ_STATE: {
4188 struct kvm_s390_irq_state irq_state;
4191 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4193 if (irq_state.len == 0) {
4197 /* do not use irq_state.flags, it will break old QEMUs */
4198 r = kvm_s390_get_irq_state(vcpu,
4199 (__u8 __user *) irq_state.buf,
4211 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4213 #ifdef CONFIG_KVM_S390_UCONTROL
4214 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4215 && (kvm_is_ucontrol(vcpu->kvm))) {
4216 vmf->page = virt_to_page(vcpu->arch.sie_block);
4217 get_page(vmf->page);
4221 return VM_FAULT_SIGBUS;
4224 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4225 unsigned long npages)
4230 /* Section: memory related */
4231 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4232 struct kvm_memory_slot *memslot,
4233 const struct kvm_userspace_memory_region *mem,
4234 enum kvm_mr_change change)
4236 /* A few sanity checks. We can have memory slots which have to be
4237 located/ended at a segment boundary (1MB). The memory in userland is
4238 ok to be fragmented into various different vmas. It is okay to mmap()
4239 and munmap() stuff in this slot after doing this call at any time */
4241 if (mem->userspace_addr & 0xffffful)
4244 if (mem->memory_size & 0xffffful)
4247 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4253 void kvm_arch_commit_memory_region(struct kvm *kvm,
4254 const struct kvm_userspace_memory_region *mem,
4255 const struct kvm_memory_slot *old,
4256 const struct kvm_memory_slot *new,
4257 enum kvm_mr_change change)
4261 /* If the basics of the memslot do not change, we do not want
4262 * to update the gmap. Every update causes several unnecessary
4263 * segment translation exceptions. This is usually handled just
4264 * fine by the normal fault handler + gmap, but it will also
4265 * cause faults on the prefix page of running guest CPUs.
4267 if (old->userspace_addr == mem->userspace_addr &&
4268 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4269 old->npages * PAGE_SIZE == mem->memory_size)
4272 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4273 mem->guest_phys_addr, mem->memory_size);
4275 pr_warn("failed to commit memory region\n");
4279 static inline unsigned long nonhyp_mask(int i)
4281 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4283 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4286 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4288 vcpu->valid_wakeup = false;
4291 static int __init kvm_s390_init(void)
4295 if (!sclp.has_sief2) {
4296 pr_info("SIE not available\n");
4300 if (nested && hpage) {
4301 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4305 for (i = 0; i < 16; i++)
4306 kvm_s390_fac_base[i] |=
4307 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4309 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4312 static void __exit kvm_s390_exit(void)
4317 module_init(kvm_s390_init);
4318 module_exit(kvm_s390_exit);
4321 * Enable autoloading of the kvm module.
4322 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4323 * since x86 takes a different approach.
4325 #include <linux/miscdevice.h>
4326 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4327 MODULE_ALIAS("devname:kvm");