Merge tag 'trace-v5.1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
[linux-2.6-block.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include "kvm-s390.h"
48 #include "gaccess.h"
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63         { "userspace_handled", VCPU_STAT(exit_userspace) },
64         { "exit_null", VCPU_STAT(exit_null) },
65         { "exit_validity", VCPU_STAT(exit_validity) },
66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
67         { "exit_external_request", VCPU_STAT(exit_external_request) },
68         { "exit_io_request", VCPU_STAT(exit_io_request) },
69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70         { "exit_instruction", VCPU_STAT(exit_instruction) },
71         { "exit_pei", VCPU_STAT(exit_pei) },
72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
83         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
84         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
85         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
87         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
89         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92         { "deliver_program", VCPU_STAT(deliver_program) },
93         { "deliver_io", VCPU_STAT(deliver_io) },
94         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
96         { "inject_ckc", VCPU_STAT(inject_ckc) },
97         { "inject_cputm", VCPU_STAT(inject_cputm) },
98         { "inject_external_call", VCPU_STAT(inject_external_call) },
99         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
100         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101         { "inject_io", VM_STAT(inject_io) },
102         { "inject_mchk", VCPU_STAT(inject_mchk) },
103         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
104         { "inject_program", VCPU_STAT(inject_program) },
105         { "inject_restart", VCPU_STAT(inject_restart) },
106         { "inject_service_signal", VM_STAT(inject_service_signal) },
107         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110         { "inject_virtio", VM_STAT(inject_virtio) },
111         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
112         { "instruction_gs", VCPU_STAT(instruction_gs) },
113         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
114         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
118         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
119         { "instruction_sck", VCPU_STAT(instruction_sck) },
120         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121         { "instruction_spx", VCPU_STAT(instruction_spx) },
122         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
123         { "instruction_stap", VCPU_STAT(instruction_stap) },
124         { "instruction_iske", VCPU_STAT(instruction_iske) },
125         { "instruction_ri", VCPU_STAT(instruction_ri) },
126         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127         { "instruction_sske", VCPU_STAT(instruction_sske) },
128         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129         { "instruction_essa", VCPU_STAT(instruction_essa) },
130         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
131         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
132         { "instruction_tb", VCPU_STAT(instruction_tb) },
133         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
134         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
135         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
136         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137         { "instruction_sie", VCPU_STAT(instruction_sie) },
138         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
155         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
156         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
158         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
159         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
160         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
161         { NULL }
162 };
163
164 struct kvm_s390_tod_clock_ext {
165         __u8 epoch_idx;
166         __u64 tod;
167         __u8 reserved[7];
168 } __packed;
169
170 /* allow nested virtualization in KVM (if enabled by user space) */
171 static int nested;
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
174
175 /* allow 1m huge page guest backing, if !nested */
176 static int hpage;
177 module_param(hpage, int, 0444);
178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
179
180 /*
181  * For now we handle at most 16 double words as this is what the s390 base
182  * kernel handles and stores in the prefix page. If we ever need to go beyond
183  * this, this requires changes to code, but the external uapi can stay.
184  */
185 #define SIZE_INTERNAL 16
186
187 /*
188  * Base feature mask that defines default mask for facilities. Consists of the
189  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
190  */
191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
192 /*
193  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
194  * and defines the facilities that can be enabled via a cpu model.
195  */
196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
197
198 static unsigned long kvm_s390_fac_size(void)
199 {
200         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
201         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
202         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
203                 sizeof(S390_lowcore.stfle_fac_list));
204
205         return SIZE_INTERNAL;
206 }
207
208 /* available cpu features supported by kvm */
209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
210 /* available subfunctions indicated via query / "test bit" */
211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
212
213 static struct gmap_notifier gmap_notifier;
214 static struct gmap_notifier vsie_gmap_notifier;
215 debug_info_t *kvm_s390_dbf;
216
217 /* Section: not file related */
218 int kvm_arch_hardware_enable(void)
219 {
220         /* every s390 is virtualization enabled ;-) */
221         return 0;
222 }
223
224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
225                               unsigned long end);
226
227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
228 {
229         u8 delta_idx = 0;
230
231         /*
232          * The TOD jumps by delta, we have to compensate this by adding
233          * -delta to the epoch.
234          */
235         delta = -delta;
236
237         /* sign-extension - we're adding to signed values below */
238         if ((s64)delta < 0)
239                 delta_idx = -1;
240
241         scb->epoch += delta;
242         if (scb->ecd & ECD_MEF) {
243                 scb->epdx += delta_idx;
244                 if (scb->epoch < delta)
245                         scb->epdx += 1;
246         }
247 }
248
249 /*
250  * This callback is executed during stop_machine(). All CPUs are therefore
251  * temporarily stopped. In order not to change guest behavior, we have to
252  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
253  * so a CPU won't be stopped while calculating with the epoch.
254  */
255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
256                           void *v)
257 {
258         struct kvm *kvm;
259         struct kvm_vcpu *vcpu;
260         int i;
261         unsigned long long *delta = v;
262
263         list_for_each_entry(kvm, &vm_list, vm_list) {
264                 kvm_for_each_vcpu(i, vcpu, kvm) {
265                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
266                         if (i == 0) {
267                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
268                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
269                         }
270                         if (vcpu->arch.cputm_enabled)
271                                 vcpu->arch.cputm_start += *delta;
272                         if (vcpu->arch.vsie_block)
273                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
274                                                    *delta);
275                 }
276         }
277         return NOTIFY_OK;
278 }
279
280 static struct notifier_block kvm_clock_notifier = {
281         .notifier_call = kvm_clock_sync,
282 };
283
284 int kvm_arch_hardware_setup(void)
285 {
286         gmap_notifier.notifier_call = kvm_gmap_notifier;
287         gmap_register_pte_notifier(&gmap_notifier);
288         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
289         gmap_register_pte_notifier(&vsie_gmap_notifier);
290         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
291                                        &kvm_clock_notifier);
292         return 0;
293 }
294
295 void kvm_arch_hardware_unsetup(void)
296 {
297         gmap_unregister_pte_notifier(&gmap_notifier);
298         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
299         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
300                                          &kvm_clock_notifier);
301 }
302
303 static void allow_cpu_feat(unsigned long nr)
304 {
305         set_bit_inv(nr, kvm_s390_available_cpu_feat);
306 }
307
308 static inline int plo_test_bit(unsigned char nr)
309 {
310         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
311         int cc;
312
313         asm volatile(
314                 /* Parameter registers are ignored for "test bit" */
315                 "       plo     0,0,0,0(0)\n"
316                 "       ipm     %0\n"
317                 "       srl     %0,28\n"
318                 : "=d" (cc)
319                 : "d" (r0)
320                 : "cc");
321         return cc == 0;
322 }
323
324 static void kvm_s390_cpu_feat_init(void)
325 {
326         int i;
327
328         for (i = 0; i < 256; ++i) {
329                 if (plo_test_bit(i))
330                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
331         }
332
333         if (test_facility(28)) /* TOD-clock steering */
334                 ptff(kvm_s390_available_subfunc.ptff,
335                      sizeof(kvm_s390_available_subfunc.ptff),
336                      PTFF_QAF);
337
338         if (test_facility(17)) { /* MSA */
339                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
340                               kvm_s390_available_subfunc.kmac);
341                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
342                               kvm_s390_available_subfunc.kmc);
343                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
344                               kvm_s390_available_subfunc.km);
345                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
346                               kvm_s390_available_subfunc.kimd);
347                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
348                               kvm_s390_available_subfunc.klmd);
349         }
350         if (test_facility(76)) /* MSA3 */
351                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
352                               kvm_s390_available_subfunc.pckmo);
353         if (test_facility(77)) { /* MSA4 */
354                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
355                               kvm_s390_available_subfunc.kmctr);
356                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
357                               kvm_s390_available_subfunc.kmf);
358                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
359                               kvm_s390_available_subfunc.kmo);
360                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
361                               kvm_s390_available_subfunc.pcc);
362         }
363         if (test_facility(57)) /* MSA5 */
364                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
365                               kvm_s390_available_subfunc.ppno);
366
367         if (test_facility(146)) /* MSA8 */
368                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
369                               kvm_s390_available_subfunc.kma);
370
371         if (MACHINE_HAS_ESOP)
372                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
373         /*
374          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
375          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
376          */
377         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
378             !test_facility(3) || !nested)
379                 return;
380         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
381         if (sclp.has_64bscao)
382                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
383         if (sclp.has_siif)
384                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
385         if (sclp.has_gpere)
386                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
387         if (sclp.has_gsls)
388                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
389         if (sclp.has_ib)
390                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
391         if (sclp.has_cei)
392                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
393         if (sclp.has_ibs)
394                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
395         if (sclp.has_kss)
396                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
397         /*
398          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
399          * all skey handling functions read/set the skey from the PGSTE
400          * instead of the real storage key.
401          *
402          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
403          * pages being detected as preserved although they are resident.
404          *
405          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
406          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
407          *
408          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
409          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
410          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
411          *
412          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
413          * cannot easily shadow the SCA because of the ipte lock.
414          */
415 }
416
417 int kvm_arch_init(void *opaque)
418 {
419         int rc;
420
421         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
422         if (!kvm_s390_dbf)
423                 return -ENOMEM;
424
425         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
426                 rc = -ENOMEM;
427                 goto out_debug_unreg;
428         }
429
430         kvm_s390_cpu_feat_init();
431
432         /* Register floating interrupt controller interface. */
433         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
434         if (rc) {
435                 pr_err("Failed to register FLIC rc=%d\n", rc);
436                 goto out_debug_unreg;
437         }
438         return 0;
439
440 out_debug_unreg:
441         debug_unregister(kvm_s390_dbf);
442         return rc;
443 }
444
445 void kvm_arch_exit(void)
446 {
447         debug_unregister(kvm_s390_dbf);
448 }
449
450 /* Section: device related */
451 long kvm_arch_dev_ioctl(struct file *filp,
452                         unsigned int ioctl, unsigned long arg)
453 {
454         if (ioctl == KVM_S390_ENABLE_SIE)
455                 return s390_enable_sie();
456         return -EINVAL;
457 }
458
459 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
460 {
461         int r;
462
463         switch (ext) {
464         case KVM_CAP_S390_PSW:
465         case KVM_CAP_S390_GMAP:
466         case KVM_CAP_SYNC_MMU:
467 #ifdef CONFIG_KVM_S390_UCONTROL
468         case KVM_CAP_S390_UCONTROL:
469 #endif
470         case KVM_CAP_ASYNC_PF:
471         case KVM_CAP_SYNC_REGS:
472         case KVM_CAP_ONE_REG:
473         case KVM_CAP_ENABLE_CAP:
474         case KVM_CAP_S390_CSS_SUPPORT:
475         case KVM_CAP_IOEVENTFD:
476         case KVM_CAP_DEVICE_CTRL:
477         case KVM_CAP_S390_IRQCHIP:
478         case KVM_CAP_VM_ATTRIBUTES:
479         case KVM_CAP_MP_STATE:
480         case KVM_CAP_IMMEDIATE_EXIT:
481         case KVM_CAP_S390_INJECT_IRQ:
482         case KVM_CAP_S390_USER_SIGP:
483         case KVM_CAP_S390_USER_STSI:
484         case KVM_CAP_S390_SKEYS:
485         case KVM_CAP_S390_IRQ_STATE:
486         case KVM_CAP_S390_USER_INSTR0:
487         case KVM_CAP_S390_CMMA_MIGRATION:
488         case KVM_CAP_S390_AIS:
489         case KVM_CAP_S390_AIS_MIGRATION:
490                 r = 1;
491                 break;
492         case KVM_CAP_S390_HPAGE_1M:
493                 r = 0;
494                 if (hpage && !kvm_is_ucontrol(kvm))
495                         r = 1;
496                 break;
497         case KVM_CAP_S390_MEM_OP:
498                 r = MEM_OP_MAX_SIZE;
499                 break;
500         case KVM_CAP_NR_VCPUS:
501         case KVM_CAP_MAX_VCPUS:
502                 r = KVM_S390_BSCA_CPU_SLOTS;
503                 if (!kvm_s390_use_sca_entries())
504                         r = KVM_MAX_VCPUS;
505                 else if (sclp.has_esca && sclp.has_64bscao)
506                         r = KVM_S390_ESCA_CPU_SLOTS;
507                 break;
508         case KVM_CAP_NR_MEMSLOTS:
509                 r = KVM_USER_MEM_SLOTS;
510                 break;
511         case KVM_CAP_S390_COW:
512                 r = MACHINE_HAS_ESOP;
513                 break;
514         case KVM_CAP_S390_VECTOR_REGISTERS:
515                 r = MACHINE_HAS_VX;
516                 break;
517         case KVM_CAP_S390_RI:
518                 r = test_facility(64);
519                 break;
520         case KVM_CAP_S390_GS:
521                 r = test_facility(133);
522                 break;
523         case KVM_CAP_S390_BPB:
524                 r = test_facility(82);
525                 break;
526         default:
527                 r = 0;
528         }
529         return r;
530 }
531
532 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
533                                     struct kvm_memory_slot *memslot)
534 {
535         int i;
536         gfn_t cur_gfn, last_gfn;
537         unsigned long gaddr, vmaddr;
538         struct gmap *gmap = kvm->arch.gmap;
539         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
540
541         /* Loop over all guest segments */
542         cur_gfn = memslot->base_gfn;
543         last_gfn = memslot->base_gfn + memslot->npages;
544         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
545                 gaddr = gfn_to_gpa(cur_gfn);
546                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
547                 if (kvm_is_error_hva(vmaddr))
548                         continue;
549
550                 bitmap_zero(bitmap, _PAGE_ENTRIES);
551                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
552                 for (i = 0; i < _PAGE_ENTRIES; i++) {
553                         if (test_bit(i, bitmap))
554                                 mark_page_dirty(kvm, cur_gfn + i);
555                 }
556
557                 if (fatal_signal_pending(current))
558                         return;
559                 cond_resched();
560         }
561 }
562
563 /* Section: vm related */
564 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
565
566 /*
567  * Get (and clear) the dirty memory log for a memory slot.
568  */
569 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
570                                struct kvm_dirty_log *log)
571 {
572         int r;
573         unsigned long n;
574         struct kvm_memslots *slots;
575         struct kvm_memory_slot *memslot;
576         int is_dirty = 0;
577
578         if (kvm_is_ucontrol(kvm))
579                 return -EINVAL;
580
581         mutex_lock(&kvm->slots_lock);
582
583         r = -EINVAL;
584         if (log->slot >= KVM_USER_MEM_SLOTS)
585                 goto out;
586
587         slots = kvm_memslots(kvm);
588         memslot = id_to_memslot(slots, log->slot);
589         r = -ENOENT;
590         if (!memslot->dirty_bitmap)
591                 goto out;
592
593         kvm_s390_sync_dirty_log(kvm, memslot);
594         r = kvm_get_dirty_log(kvm, log, &is_dirty);
595         if (r)
596                 goto out;
597
598         /* Clear the dirty log */
599         if (is_dirty) {
600                 n = kvm_dirty_bitmap_bytes(memslot);
601                 memset(memslot->dirty_bitmap, 0, n);
602         }
603         r = 0;
604 out:
605         mutex_unlock(&kvm->slots_lock);
606         return r;
607 }
608
609 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
610 {
611         unsigned int i;
612         struct kvm_vcpu *vcpu;
613
614         kvm_for_each_vcpu(i, vcpu, kvm) {
615                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
616         }
617 }
618
619 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
620 {
621         int r;
622
623         if (cap->flags)
624                 return -EINVAL;
625
626         switch (cap->cap) {
627         case KVM_CAP_S390_IRQCHIP:
628                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
629                 kvm->arch.use_irqchip = 1;
630                 r = 0;
631                 break;
632         case KVM_CAP_S390_USER_SIGP:
633                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
634                 kvm->arch.user_sigp = 1;
635                 r = 0;
636                 break;
637         case KVM_CAP_S390_VECTOR_REGISTERS:
638                 mutex_lock(&kvm->lock);
639                 if (kvm->created_vcpus) {
640                         r = -EBUSY;
641                 } else if (MACHINE_HAS_VX) {
642                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
643                         set_kvm_facility(kvm->arch.model.fac_list, 129);
644                         if (test_facility(134)) {
645                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
646                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
647                         }
648                         if (test_facility(135)) {
649                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
650                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
651                         }
652                         r = 0;
653                 } else
654                         r = -EINVAL;
655                 mutex_unlock(&kvm->lock);
656                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
657                          r ? "(not available)" : "(success)");
658                 break;
659         case KVM_CAP_S390_RI:
660                 r = -EINVAL;
661                 mutex_lock(&kvm->lock);
662                 if (kvm->created_vcpus) {
663                         r = -EBUSY;
664                 } else if (test_facility(64)) {
665                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
666                         set_kvm_facility(kvm->arch.model.fac_list, 64);
667                         r = 0;
668                 }
669                 mutex_unlock(&kvm->lock);
670                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
671                          r ? "(not available)" : "(success)");
672                 break;
673         case KVM_CAP_S390_AIS:
674                 mutex_lock(&kvm->lock);
675                 if (kvm->created_vcpus) {
676                         r = -EBUSY;
677                 } else {
678                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
679                         set_kvm_facility(kvm->arch.model.fac_list, 72);
680                         r = 0;
681                 }
682                 mutex_unlock(&kvm->lock);
683                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
684                          r ? "(not available)" : "(success)");
685                 break;
686         case KVM_CAP_S390_GS:
687                 r = -EINVAL;
688                 mutex_lock(&kvm->lock);
689                 if (kvm->created_vcpus) {
690                         r = -EBUSY;
691                 } else if (test_facility(133)) {
692                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
693                         set_kvm_facility(kvm->arch.model.fac_list, 133);
694                         r = 0;
695                 }
696                 mutex_unlock(&kvm->lock);
697                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
698                          r ? "(not available)" : "(success)");
699                 break;
700         case KVM_CAP_S390_HPAGE_1M:
701                 mutex_lock(&kvm->lock);
702                 if (kvm->created_vcpus)
703                         r = -EBUSY;
704                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
705                         r = -EINVAL;
706                 else {
707                         r = 0;
708                         down_write(&kvm->mm->mmap_sem);
709                         kvm->mm->context.allow_gmap_hpage_1m = 1;
710                         up_write(&kvm->mm->mmap_sem);
711                         /*
712                          * We might have to create fake 4k page
713                          * tables. To avoid that the hardware works on
714                          * stale PGSTEs, we emulate these instructions.
715                          */
716                         kvm->arch.use_skf = 0;
717                         kvm->arch.use_pfmfi = 0;
718                 }
719                 mutex_unlock(&kvm->lock);
720                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
721                          r ? "(not available)" : "(success)");
722                 break;
723         case KVM_CAP_S390_USER_STSI:
724                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
725                 kvm->arch.user_stsi = 1;
726                 r = 0;
727                 break;
728         case KVM_CAP_S390_USER_INSTR0:
729                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
730                 kvm->arch.user_instr0 = 1;
731                 icpt_operexc_on_all_vcpus(kvm);
732                 r = 0;
733                 break;
734         default:
735                 r = -EINVAL;
736                 break;
737         }
738         return r;
739 }
740
741 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
742 {
743         int ret;
744
745         switch (attr->attr) {
746         case KVM_S390_VM_MEM_LIMIT_SIZE:
747                 ret = 0;
748                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
749                          kvm->arch.mem_limit);
750                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
751                         ret = -EFAULT;
752                 break;
753         default:
754                 ret = -ENXIO;
755                 break;
756         }
757         return ret;
758 }
759
760 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
761 {
762         int ret;
763         unsigned int idx;
764         switch (attr->attr) {
765         case KVM_S390_VM_MEM_ENABLE_CMMA:
766                 ret = -ENXIO;
767                 if (!sclp.has_cmma)
768                         break;
769
770                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
771                 mutex_lock(&kvm->lock);
772                 if (kvm->created_vcpus)
773                         ret = -EBUSY;
774                 else if (kvm->mm->context.allow_gmap_hpage_1m)
775                         ret = -EINVAL;
776                 else {
777                         kvm->arch.use_cmma = 1;
778                         /* Not compatible with cmma. */
779                         kvm->arch.use_pfmfi = 0;
780                         ret = 0;
781                 }
782                 mutex_unlock(&kvm->lock);
783                 break;
784         case KVM_S390_VM_MEM_CLR_CMMA:
785                 ret = -ENXIO;
786                 if (!sclp.has_cmma)
787                         break;
788                 ret = -EINVAL;
789                 if (!kvm->arch.use_cmma)
790                         break;
791
792                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
793                 mutex_lock(&kvm->lock);
794                 idx = srcu_read_lock(&kvm->srcu);
795                 s390_reset_cmma(kvm->arch.gmap->mm);
796                 srcu_read_unlock(&kvm->srcu, idx);
797                 mutex_unlock(&kvm->lock);
798                 ret = 0;
799                 break;
800         case KVM_S390_VM_MEM_LIMIT_SIZE: {
801                 unsigned long new_limit;
802
803                 if (kvm_is_ucontrol(kvm))
804                         return -EINVAL;
805
806                 if (get_user(new_limit, (u64 __user *)attr->addr))
807                         return -EFAULT;
808
809                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
810                     new_limit > kvm->arch.mem_limit)
811                         return -E2BIG;
812
813                 if (!new_limit)
814                         return -EINVAL;
815
816                 /* gmap_create takes last usable address */
817                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
818                         new_limit -= 1;
819
820                 ret = -EBUSY;
821                 mutex_lock(&kvm->lock);
822                 if (!kvm->created_vcpus) {
823                         /* gmap_create will round the limit up */
824                         struct gmap *new = gmap_create(current->mm, new_limit);
825
826                         if (!new) {
827                                 ret = -ENOMEM;
828                         } else {
829                                 gmap_remove(kvm->arch.gmap);
830                                 new->private = kvm;
831                                 kvm->arch.gmap = new;
832                                 ret = 0;
833                         }
834                 }
835                 mutex_unlock(&kvm->lock);
836                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
837                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
838                          (void *) kvm->arch.gmap->asce);
839                 break;
840         }
841         default:
842                 ret = -ENXIO;
843                 break;
844         }
845         return ret;
846 }
847
848 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
849
850 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
851 {
852         struct kvm_vcpu *vcpu;
853         int i;
854
855         kvm_s390_vcpu_block_all(kvm);
856
857         kvm_for_each_vcpu(i, vcpu, kvm) {
858                 kvm_s390_vcpu_crypto_setup(vcpu);
859                 /* recreate the shadow crycb by leaving the VSIE handler */
860                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
861         }
862
863         kvm_s390_vcpu_unblock_all(kvm);
864 }
865
866 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
867 {
868         mutex_lock(&kvm->lock);
869         switch (attr->attr) {
870         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
871                 if (!test_kvm_facility(kvm, 76)) {
872                         mutex_unlock(&kvm->lock);
873                         return -EINVAL;
874                 }
875                 get_random_bytes(
876                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
877                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
878                 kvm->arch.crypto.aes_kw = 1;
879                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
880                 break;
881         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
882                 if (!test_kvm_facility(kvm, 76)) {
883                         mutex_unlock(&kvm->lock);
884                         return -EINVAL;
885                 }
886                 get_random_bytes(
887                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
888                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
889                 kvm->arch.crypto.dea_kw = 1;
890                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
891                 break;
892         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
893                 if (!test_kvm_facility(kvm, 76)) {
894                         mutex_unlock(&kvm->lock);
895                         return -EINVAL;
896                 }
897                 kvm->arch.crypto.aes_kw = 0;
898                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
899                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
900                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
901                 break;
902         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
903                 if (!test_kvm_facility(kvm, 76)) {
904                         mutex_unlock(&kvm->lock);
905                         return -EINVAL;
906                 }
907                 kvm->arch.crypto.dea_kw = 0;
908                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
909                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
910                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
911                 break;
912         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
913                 if (!ap_instructions_available()) {
914                         mutex_unlock(&kvm->lock);
915                         return -EOPNOTSUPP;
916                 }
917                 kvm->arch.crypto.apie = 1;
918                 break;
919         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
920                 if (!ap_instructions_available()) {
921                         mutex_unlock(&kvm->lock);
922                         return -EOPNOTSUPP;
923                 }
924                 kvm->arch.crypto.apie = 0;
925                 break;
926         default:
927                 mutex_unlock(&kvm->lock);
928                 return -ENXIO;
929         }
930
931         kvm_s390_vcpu_crypto_reset_all(kvm);
932         mutex_unlock(&kvm->lock);
933         return 0;
934 }
935
936 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
937 {
938         int cx;
939         struct kvm_vcpu *vcpu;
940
941         kvm_for_each_vcpu(cx, vcpu, kvm)
942                 kvm_s390_sync_request(req, vcpu);
943 }
944
945 /*
946  * Must be called with kvm->srcu held to avoid races on memslots, and with
947  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
948  */
949 static int kvm_s390_vm_start_migration(struct kvm *kvm)
950 {
951         struct kvm_memory_slot *ms;
952         struct kvm_memslots *slots;
953         unsigned long ram_pages = 0;
954         int slotnr;
955
956         /* migration mode already enabled */
957         if (kvm->arch.migration_mode)
958                 return 0;
959         slots = kvm_memslots(kvm);
960         if (!slots || !slots->used_slots)
961                 return -EINVAL;
962
963         if (!kvm->arch.use_cmma) {
964                 kvm->arch.migration_mode = 1;
965                 return 0;
966         }
967         /* mark all the pages in active slots as dirty */
968         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
969                 ms = slots->memslots + slotnr;
970                 /*
971                  * The second half of the bitmap is only used on x86,
972                  * and would be wasted otherwise, so we put it to good
973                  * use here to keep track of the state of the storage
974                  * attributes.
975                  */
976                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
977                 ram_pages += ms->npages;
978         }
979         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
980         kvm->arch.migration_mode = 1;
981         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
982         return 0;
983 }
984
985 /*
986  * Must be called with kvm->slots_lock to avoid races with ourselves and
987  * kvm_s390_vm_start_migration.
988  */
989 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
990 {
991         /* migration mode already disabled */
992         if (!kvm->arch.migration_mode)
993                 return 0;
994         kvm->arch.migration_mode = 0;
995         if (kvm->arch.use_cmma)
996                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
997         return 0;
998 }
999
1000 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1001                                      struct kvm_device_attr *attr)
1002 {
1003         int res = -ENXIO;
1004
1005         mutex_lock(&kvm->slots_lock);
1006         switch (attr->attr) {
1007         case KVM_S390_VM_MIGRATION_START:
1008                 res = kvm_s390_vm_start_migration(kvm);
1009                 break;
1010         case KVM_S390_VM_MIGRATION_STOP:
1011                 res = kvm_s390_vm_stop_migration(kvm);
1012                 break;
1013         default:
1014                 break;
1015         }
1016         mutex_unlock(&kvm->slots_lock);
1017
1018         return res;
1019 }
1020
1021 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1022                                      struct kvm_device_attr *attr)
1023 {
1024         u64 mig = kvm->arch.migration_mode;
1025
1026         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1027                 return -ENXIO;
1028
1029         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1030                 return -EFAULT;
1031         return 0;
1032 }
1033
1034 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1035 {
1036         struct kvm_s390_vm_tod_clock gtod;
1037
1038         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1039                 return -EFAULT;
1040
1041         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1042                 return -EINVAL;
1043         kvm_s390_set_tod_clock(kvm, &gtod);
1044
1045         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1046                 gtod.epoch_idx, gtod.tod);
1047
1048         return 0;
1049 }
1050
1051 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1052 {
1053         u8 gtod_high;
1054
1055         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1056                                            sizeof(gtod_high)))
1057                 return -EFAULT;
1058
1059         if (gtod_high != 0)
1060                 return -EINVAL;
1061         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1062
1063         return 0;
1064 }
1065
1066 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1067 {
1068         struct kvm_s390_vm_tod_clock gtod = { 0 };
1069
1070         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1071                            sizeof(gtod.tod)))
1072                 return -EFAULT;
1073
1074         kvm_s390_set_tod_clock(kvm, &gtod);
1075         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1076         return 0;
1077 }
1078
1079 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1080 {
1081         int ret;
1082
1083         if (attr->flags)
1084                 return -EINVAL;
1085
1086         switch (attr->attr) {
1087         case KVM_S390_VM_TOD_EXT:
1088                 ret = kvm_s390_set_tod_ext(kvm, attr);
1089                 break;
1090         case KVM_S390_VM_TOD_HIGH:
1091                 ret = kvm_s390_set_tod_high(kvm, attr);
1092                 break;
1093         case KVM_S390_VM_TOD_LOW:
1094                 ret = kvm_s390_set_tod_low(kvm, attr);
1095                 break;
1096         default:
1097                 ret = -ENXIO;
1098                 break;
1099         }
1100         return ret;
1101 }
1102
1103 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1104                                    struct kvm_s390_vm_tod_clock *gtod)
1105 {
1106         struct kvm_s390_tod_clock_ext htod;
1107
1108         preempt_disable();
1109
1110         get_tod_clock_ext((char *)&htod);
1111
1112         gtod->tod = htod.tod + kvm->arch.epoch;
1113         gtod->epoch_idx = 0;
1114         if (test_kvm_facility(kvm, 139)) {
1115                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1116                 if (gtod->tod < htod.tod)
1117                         gtod->epoch_idx += 1;
1118         }
1119
1120         preempt_enable();
1121 }
1122
1123 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1124 {
1125         struct kvm_s390_vm_tod_clock gtod;
1126
1127         memset(&gtod, 0, sizeof(gtod));
1128         kvm_s390_get_tod_clock(kvm, &gtod);
1129         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1130                 return -EFAULT;
1131
1132         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1133                 gtod.epoch_idx, gtod.tod);
1134         return 0;
1135 }
1136
1137 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1138 {
1139         u8 gtod_high = 0;
1140
1141         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1142                                          sizeof(gtod_high)))
1143                 return -EFAULT;
1144         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1145
1146         return 0;
1147 }
1148
1149 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1150 {
1151         u64 gtod;
1152
1153         gtod = kvm_s390_get_tod_clock_fast(kvm);
1154         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1155                 return -EFAULT;
1156         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1157
1158         return 0;
1159 }
1160
1161 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1162 {
1163         int ret;
1164
1165         if (attr->flags)
1166                 return -EINVAL;
1167
1168         switch (attr->attr) {
1169         case KVM_S390_VM_TOD_EXT:
1170                 ret = kvm_s390_get_tod_ext(kvm, attr);
1171                 break;
1172         case KVM_S390_VM_TOD_HIGH:
1173                 ret = kvm_s390_get_tod_high(kvm, attr);
1174                 break;
1175         case KVM_S390_VM_TOD_LOW:
1176                 ret = kvm_s390_get_tod_low(kvm, attr);
1177                 break;
1178         default:
1179                 ret = -ENXIO;
1180                 break;
1181         }
1182         return ret;
1183 }
1184
1185 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1186 {
1187         struct kvm_s390_vm_cpu_processor *proc;
1188         u16 lowest_ibc, unblocked_ibc;
1189         int ret = 0;
1190
1191         mutex_lock(&kvm->lock);
1192         if (kvm->created_vcpus) {
1193                 ret = -EBUSY;
1194                 goto out;
1195         }
1196         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1197         if (!proc) {
1198                 ret = -ENOMEM;
1199                 goto out;
1200         }
1201         if (!copy_from_user(proc, (void __user *)attr->addr,
1202                             sizeof(*proc))) {
1203                 kvm->arch.model.cpuid = proc->cpuid;
1204                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1205                 unblocked_ibc = sclp.ibc & 0xfff;
1206                 if (lowest_ibc && proc->ibc) {
1207                         if (proc->ibc > unblocked_ibc)
1208                                 kvm->arch.model.ibc = unblocked_ibc;
1209                         else if (proc->ibc < lowest_ibc)
1210                                 kvm->arch.model.ibc = lowest_ibc;
1211                         else
1212                                 kvm->arch.model.ibc = proc->ibc;
1213                 }
1214                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1215                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1216                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1217                          kvm->arch.model.ibc,
1218                          kvm->arch.model.cpuid);
1219                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1220                          kvm->arch.model.fac_list[0],
1221                          kvm->arch.model.fac_list[1],
1222                          kvm->arch.model.fac_list[2]);
1223         } else
1224                 ret = -EFAULT;
1225         kfree(proc);
1226 out:
1227         mutex_unlock(&kvm->lock);
1228         return ret;
1229 }
1230
1231 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1232                                        struct kvm_device_attr *attr)
1233 {
1234         struct kvm_s390_vm_cpu_feat data;
1235
1236         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1237                 return -EFAULT;
1238         if (!bitmap_subset((unsigned long *) data.feat,
1239                            kvm_s390_available_cpu_feat,
1240                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1241                 return -EINVAL;
1242
1243         mutex_lock(&kvm->lock);
1244         if (kvm->created_vcpus) {
1245                 mutex_unlock(&kvm->lock);
1246                 return -EBUSY;
1247         }
1248         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1249                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1250         mutex_unlock(&kvm->lock);
1251         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1252                          data.feat[0],
1253                          data.feat[1],
1254                          data.feat[2]);
1255         return 0;
1256 }
1257
1258 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1259                                           struct kvm_device_attr *attr)
1260 {
1261         /*
1262          * Once supported by kernel + hw, we have to store the subfunctions
1263          * in kvm->arch and remember that user space configured them.
1264          */
1265         return -ENXIO;
1266 }
1267
1268 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1269 {
1270         int ret = -ENXIO;
1271
1272         switch (attr->attr) {
1273         case KVM_S390_VM_CPU_PROCESSOR:
1274                 ret = kvm_s390_set_processor(kvm, attr);
1275                 break;
1276         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1277                 ret = kvm_s390_set_processor_feat(kvm, attr);
1278                 break;
1279         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1280                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1281                 break;
1282         }
1283         return ret;
1284 }
1285
1286 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1287 {
1288         struct kvm_s390_vm_cpu_processor *proc;
1289         int ret = 0;
1290
1291         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1292         if (!proc) {
1293                 ret = -ENOMEM;
1294                 goto out;
1295         }
1296         proc->cpuid = kvm->arch.model.cpuid;
1297         proc->ibc = kvm->arch.model.ibc;
1298         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1299                S390_ARCH_FAC_LIST_SIZE_BYTE);
1300         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1301                  kvm->arch.model.ibc,
1302                  kvm->arch.model.cpuid);
1303         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1304                  kvm->arch.model.fac_list[0],
1305                  kvm->arch.model.fac_list[1],
1306                  kvm->arch.model.fac_list[2]);
1307         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1308                 ret = -EFAULT;
1309         kfree(proc);
1310 out:
1311         return ret;
1312 }
1313
1314 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1315 {
1316         struct kvm_s390_vm_cpu_machine *mach;
1317         int ret = 0;
1318
1319         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1320         if (!mach) {
1321                 ret = -ENOMEM;
1322                 goto out;
1323         }
1324         get_cpu_id((struct cpuid *) &mach->cpuid);
1325         mach->ibc = sclp.ibc;
1326         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1327                S390_ARCH_FAC_LIST_SIZE_BYTE);
1328         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1329                sizeof(S390_lowcore.stfle_fac_list));
1330         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1331                  kvm->arch.model.ibc,
1332                  kvm->arch.model.cpuid);
1333         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1334                  mach->fac_mask[0],
1335                  mach->fac_mask[1],
1336                  mach->fac_mask[2]);
1337         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1338                  mach->fac_list[0],
1339                  mach->fac_list[1],
1340                  mach->fac_list[2]);
1341         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1342                 ret = -EFAULT;
1343         kfree(mach);
1344 out:
1345         return ret;
1346 }
1347
1348 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1349                                        struct kvm_device_attr *attr)
1350 {
1351         struct kvm_s390_vm_cpu_feat data;
1352
1353         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1354                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1355         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1356                 return -EFAULT;
1357         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1358                          data.feat[0],
1359                          data.feat[1],
1360                          data.feat[2]);
1361         return 0;
1362 }
1363
1364 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1365                                      struct kvm_device_attr *attr)
1366 {
1367         struct kvm_s390_vm_cpu_feat data;
1368
1369         bitmap_copy((unsigned long *) data.feat,
1370                     kvm_s390_available_cpu_feat,
1371                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1372         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1373                 return -EFAULT;
1374         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1375                          data.feat[0],
1376                          data.feat[1],
1377                          data.feat[2]);
1378         return 0;
1379 }
1380
1381 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1382                                           struct kvm_device_attr *attr)
1383 {
1384         /*
1385          * Once we can actually configure subfunctions (kernel + hw support),
1386          * we have to check if they were already set by user space, if so copy
1387          * them from kvm->arch.
1388          */
1389         return -ENXIO;
1390 }
1391
1392 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1393                                         struct kvm_device_attr *attr)
1394 {
1395         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1396             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1397                 return -EFAULT;
1398         return 0;
1399 }
1400 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1401 {
1402         int ret = -ENXIO;
1403
1404         switch (attr->attr) {
1405         case KVM_S390_VM_CPU_PROCESSOR:
1406                 ret = kvm_s390_get_processor(kvm, attr);
1407                 break;
1408         case KVM_S390_VM_CPU_MACHINE:
1409                 ret = kvm_s390_get_machine(kvm, attr);
1410                 break;
1411         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1412                 ret = kvm_s390_get_processor_feat(kvm, attr);
1413                 break;
1414         case KVM_S390_VM_CPU_MACHINE_FEAT:
1415                 ret = kvm_s390_get_machine_feat(kvm, attr);
1416                 break;
1417         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1418                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1419                 break;
1420         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1421                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1422                 break;
1423         }
1424         return ret;
1425 }
1426
1427 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1428 {
1429         int ret;
1430
1431         switch (attr->group) {
1432         case KVM_S390_VM_MEM_CTRL:
1433                 ret = kvm_s390_set_mem_control(kvm, attr);
1434                 break;
1435         case KVM_S390_VM_TOD:
1436                 ret = kvm_s390_set_tod(kvm, attr);
1437                 break;
1438         case KVM_S390_VM_CPU_MODEL:
1439                 ret = kvm_s390_set_cpu_model(kvm, attr);
1440                 break;
1441         case KVM_S390_VM_CRYPTO:
1442                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1443                 break;
1444         case KVM_S390_VM_MIGRATION:
1445                 ret = kvm_s390_vm_set_migration(kvm, attr);
1446                 break;
1447         default:
1448                 ret = -ENXIO;
1449                 break;
1450         }
1451
1452         return ret;
1453 }
1454
1455 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1456 {
1457         int ret;
1458
1459         switch (attr->group) {
1460         case KVM_S390_VM_MEM_CTRL:
1461                 ret = kvm_s390_get_mem_control(kvm, attr);
1462                 break;
1463         case KVM_S390_VM_TOD:
1464                 ret = kvm_s390_get_tod(kvm, attr);
1465                 break;
1466         case KVM_S390_VM_CPU_MODEL:
1467                 ret = kvm_s390_get_cpu_model(kvm, attr);
1468                 break;
1469         case KVM_S390_VM_MIGRATION:
1470                 ret = kvm_s390_vm_get_migration(kvm, attr);
1471                 break;
1472         default:
1473                 ret = -ENXIO;
1474                 break;
1475         }
1476
1477         return ret;
1478 }
1479
1480 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1481 {
1482         int ret;
1483
1484         switch (attr->group) {
1485         case KVM_S390_VM_MEM_CTRL:
1486                 switch (attr->attr) {
1487                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1488                 case KVM_S390_VM_MEM_CLR_CMMA:
1489                         ret = sclp.has_cmma ? 0 : -ENXIO;
1490                         break;
1491                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1492                         ret = 0;
1493                         break;
1494                 default:
1495                         ret = -ENXIO;
1496                         break;
1497                 }
1498                 break;
1499         case KVM_S390_VM_TOD:
1500                 switch (attr->attr) {
1501                 case KVM_S390_VM_TOD_LOW:
1502                 case KVM_S390_VM_TOD_HIGH:
1503                         ret = 0;
1504                         break;
1505                 default:
1506                         ret = -ENXIO;
1507                         break;
1508                 }
1509                 break;
1510         case KVM_S390_VM_CPU_MODEL:
1511                 switch (attr->attr) {
1512                 case KVM_S390_VM_CPU_PROCESSOR:
1513                 case KVM_S390_VM_CPU_MACHINE:
1514                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1515                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1516                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1517                         ret = 0;
1518                         break;
1519                 /* configuring subfunctions is not supported yet */
1520                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1521                 default:
1522                         ret = -ENXIO;
1523                         break;
1524                 }
1525                 break;
1526         case KVM_S390_VM_CRYPTO:
1527                 switch (attr->attr) {
1528                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1529                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1530                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1531                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1532                         ret = 0;
1533                         break;
1534                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1535                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1536                         ret = ap_instructions_available() ? 0 : -ENXIO;
1537                         break;
1538                 default:
1539                         ret = -ENXIO;
1540                         break;
1541                 }
1542                 break;
1543         case KVM_S390_VM_MIGRATION:
1544                 ret = 0;
1545                 break;
1546         default:
1547                 ret = -ENXIO;
1548                 break;
1549         }
1550
1551         return ret;
1552 }
1553
1554 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1555 {
1556         uint8_t *keys;
1557         uint64_t hva;
1558         int srcu_idx, i, r = 0;
1559
1560         if (args->flags != 0)
1561                 return -EINVAL;
1562
1563         /* Is this guest using storage keys? */
1564         if (!mm_uses_skeys(current->mm))
1565                 return KVM_S390_GET_SKEYS_NONE;
1566
1567         /* Enforce sane limit on memory allocation */
1568         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1569                 return -EINVAL;
1570
1571         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1572         if (!keys)
1573                 return -ENOMEM;
1574
1575         down_read(&current->mm->mmap_sem);
1576         srcu_idx = srcu_read_lock(&kvm->srcu);
1577         for (i = 0; i < args->count; i++) {
1578                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1579                 if (kvm_is_error_hva(hva)) {
1580                         r = -EFAULT;
1581                         break;
1582                 }
1583
1584                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1585                 if (r)
1586                         break;
1587         }
1588         srcu_read_unlock(&kvm->srcu, srcu_idx);
1589         up_read(&current->mm->mmap_sem);
1590
1591         if (!r) {
1592                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1593                                  sizeof(uint8_t) * args->count);
1594                 if (r)
1595                         r = -EFAULT;
1596         }
1597
1598         kvfree(keys);
1599         return r;
1600 }
1601
1602 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1603 {
1604         uint8_t *keys;
1605         uint64_t hva;
1606         int srcu_idx, i, r = 0;
1607         bool unlocked;
1608
1609         if (args->flags != 0)
1610                 return -EINVAL;
1611
1612         /* Enforce sane limit on memory allocation */
1613         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1614                 return -EINVAL;
1615
1616         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1617         if (!keys)
1618                 return -ENOMEM;
1619
1620         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1621                            sizeof(uint8_t) * args->count);
1622         if (r) {
1623                 r = -EFAULT;
1624                 goto out;
1625         }
1626
1627         /* Enable storage key handling for the guest */
1628         r = s390_enable_skey();
1629         if (r)
1630                 goto out;
1631
1632         i = 0;
1633         down_read(&current->mm->mmap_sem);
1634         srcu_idx = srcu_read_lock(&kvm->srcu);
1635         while (i < args->count) {
1636                 unlocked = false;
1637                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1638                 if (kvm_is_error_hva(hva)) {
1639                         r = -EFAULT;
1640                         break;
1641                 }
1642
1643                 /* Lowest order bit is reserved */
1644                 if (keys[i] & 0x01) {
1645                         r = -EINVAL;
1646                         break;
1647                 }
1648
1649                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1650                 if (r) {
1651                         r = fixup_user_fault(current, current->mm, hva,
1652                                              FAULT_FLAG_WRITE, &unlocked);
1653                         if (r)
1654                                 break;
1655                 }
1656                 if (!r)
1657                         i++;
1658         }
1659         srcu_read_unlock(&kvm->srcu, srcu_idx);
1660         up_read(&current->mm->mmap_sem);
1661 out:
1662         kvfree(keys);
1663         return r;
1664 }
1665
1666 /*
1667  * Base address and length must be sent at the start of each block, therefore
1668  * it's cheaper to send some clean data, as long as it's less than the size of
1669  * two longs.
1670  */
1671 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1672 /* for consistency */
1673 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1674
1675 /*
1676  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1677  * address falls in a hole. In that case the index of one of the memslots
1678  * bordering the hole is returned.
1679  */
1680 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1681 {
1682         int start = 0, end = slots->used_slots;
1683         int slot = atomic_read(&slots->lru_slot);
1684         struct kvm_memory_slot *memslots = slots->memslots;
1685
1686         if (gfn >= memslots[slot].base_gfn &&
1687             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1688                 return slot;
1689
1690         while (start < end) {
1691                 slot = start + (end - start) / 2;
1692
1693                 if (gfn >= memslots[slot].base_gfn)
1694                         end = slot;
1695                 else
1696                         start = slot + 1;
1697         }
1698
1699         if (gfn >= memslots[start].base_gfn &&
1700             gfn < memslots[start].base_gfn + memslots[start].npages) {
1701                 atomic_set(&slots->lru_slot, start);
1702         }
1703
1704         return start;
1705 }
1706
1707 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1708                               u8 *res, unsigned long bufsize)
1709 {
1710         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1711
1712         args->count = 0;
1713         while (args->count < bufsize) {
1714                 hva = gfn_to_hva(kvm, cur_gfn);
1715                 /*
1716                  * We return an error if the first value was invalid, but we
1717                  * return successfully if at least one value was copied.
1718                  */
1719                 if (kvm_is_error_hva(hva))
1720                         return args->count ? 0 : -EFAULT;
1721                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1722                         pgstev = 0;
1723                 res[args->count++] = (pgstev >> 24) & 0x43;
1724                 cur_gfn++;
1725         }
1726
1727         return 0;
1728 }
1729
1730 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1731                                               unsigned long cur_gfn)
1732 {
1733         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1734         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1735         unsigned long ofs = cur_gfn - ms->base_gfn;
1736
1737         if (ms->base_gfn + ms->npages <= cur_gfn) {
1738                 slotidx--;
1739                 /* If we are above the highest slot, wrap around */
1740                 if (slotidx < 0)
1741                         slotidx = slots->used_slots - 1;
1742
1743                 ms = slots->memslots + slotidx;
1744                 ofs = 0;
1745         }
1746         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1747         while ((slotidx > 0) && (ofs >= ms->npages)) {
1748                 slotidx--;
1749                 ms = slots->memslots + slotidx;
1750                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1751         }
1752         return ms->base_gfn + ofs;
1753 }
1754
1755 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1756                              u8 *res, unsigned long bufsize)
1757 {
1758         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1759         struct kvm_memslots *slots = kvm_memslots(kvm);
1760         struct kvm_memory_slot *ms;
1761
1762         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1763         ms = gfn_to_memslot(kvm, cur_gfn);
1764         args->count = 0;
1765         args->start_gfn = cur_gfn;
1766         if (!ms)
1767                 return 0;
1768         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1769         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1770
1771         while (args->count < bufsize) {
1772                 hva = gfn_to_hva(kvm, cur_gfn);
1773                 if (kvm_is_error_hva(hva))
1774                         return 0;
1775                 /* Decrement only if we actually flipped the bit to 0 */
1776                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1777                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
1778                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1779                         pgstev = 0;
1780                 /* Save the value */
1781                 res[args->count++] = (pgstev >> 24) & 0x43;
1782                 /* If the next bit is too far away, stop. */
1783                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1784                         return 0;
1785                 /* If we reached the previous "next", find the next one */
1786                 if (cur_gfn == next_gfn)
1787                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1788                 /* Reached the end of memory or of the buffer, stop */
1789                 if ((next_gfn >= mem_end) ||
1790                     (next_gfn - args->start_gfn >= bufsize))
1791                         return 0;
1792                 cur_gfn++;
1793                 /* Reached the end of the current memslot, take the next one. */
1794                 if (cur_gfn - ms->base_gfn >= ms->npages) {
1795                         ms = gfn_to_memslot(kvm, cur_gfn);
1796                         if (!ms)
1797                                 return 0;
1798                 }
1799         }
1800         return 0;
1801 }
1802
1803 /*
1804  * This function searches for the next page with dirty CMMA attributes, and
1805  * saves the attributes in the buffer up to either the end of the buffer or
1806  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1807  * no trailing clean bytes are saved.
1808  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1809  * output buffer will indicate 0 as length.
1810  */
1811 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1812                                   struct kvm_s390_cmma_log *args)
1813 {
1814         unsigned long bufsize;
1815         int srcu_idx, peek, ret;
1816         u8 *values;
1817
1818         if (!kvm->arch.use_cmma)
1819                 return -ENXIO;
1820         /* Invalid/unsupported flags were specified */
1821         if (args->flags & ~KVM_S390_CMMA_PEEK)
1822                 return -EINVAL;
1823         /* Migration mode query, and we are not doing a migration */
1824         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1825         if (!peek && !kvm->arch.migration_mode)
1826                 return -EINVAL;
1827         /* CMMA is disabled or was not used, or the buffer has length zero */
1828         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1829         if (!bufsize || !kvm->mm->context.uses_cmm) {
1830                 memset(args, 0, sizeof(*args));
1831                 return 0;
1832         }
1833         /* We are not peeking, and there are no dirty pages */
1834         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1835                 memset(args, 0, sizeof(*args));
1836                 return 0;
1837         }
1838
1839         values = vmalloc(bufsize);
1840         if (!values)
1841                 return -ENOMEM;
1842
1843         down_read(&kvm->mm->mmap_sem);
1844         srcu_idx = srcu_read_lock(&kvm->srcu);
1845         if (peek)
1846                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1847         else
1848                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1849         srcu_read_unlock(&kvm->srcu, srcu_idx);
1850         up_read(&kvm->mm->mmap_sem);
1851
1852         if (kvm->arch.migration_mode)
1853                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1854         else
1855                 args->remaining = 0;
1856
1857         if (copy_to_user((void __user *)args->values, values, args->count))
1858                 ret = -EFAULT;
1859
1860         vfree(values);
1861         return ret;
1862 }
1863
1864 /*
1865  * This function sets the CMMA attributes for the given pages. If the input
1866  * buffer has zero length, no action is taken, otherwise the attributes are
1867  * set and the mm->context.uses_cmm flag is set.
1868  */
1869 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1870                                   const struct kvm_s390_cmma_log *args)
1871 {
1872         unsigned long hva, mask, pgstev, i;
1873         uint8_t *bits;
1874         int srcu_idx, r = 0;
1875
1876         mask = args->mask;
1877
1878         if (!kvm->arch.use_cmma)
1879                 return -ENXIO;
1880         /* invalid/unsupported flags */
1881         if (args->flags != 0)
1882                 return -EINVAL;
1883         /* Enforce sane limit on memory allocation */
1884         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1885                 return -EINVAL;
1886         /* Nothing to do */
1887         if (args->count == 0)
1888                 return 0;
1889
1890         bits = vmalloc(array_size(sizeof(*bits), args->count));
1891         if (!bits)
1892                 return -ENOMEM;
1893
1894         r = copy_from_user(bits, (void __user *)args->values, args->count);
1895         if (r) {
1896                 r = -EFAULT;
1897                 goto out;
1898         }
1899
1900         down_read(&kvm->mm->mmap_sem);
1901         srcu_idx = srcu_read_lock(&kvm->srcu);
1902         for (i = 0; i < args->count; i++) {
1903                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1904                 if (kvm_is_error_hva(hva)) {
1905                         r = -EFAULT;
1906                         break;
1907                 }
1908
1909                 pgstev = bits[i];
1910                 pgstev = pgstev << 24;
1911                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1912                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1913         }
1914         srcu_read_unlock(&kvm->srcu, srcu_idx);
1915         up_read(&kvm->mm->mmap_sem);
1916
1917         if (!kvm->mm->context.uses_cmm) {
1918                 down_write(&kvm->mm->mmap_sem);
1919                 kvm->mm->context.uses_cmm = 1;
1920                 up_write(&kvm->mm->mmap_sem);
1921         }
1922 out:
1923         vfree(bits);
1924         return r;
1925 }
1926
1927 long kvm_arch_vm_ioctl(struct file *filp,
1928                        unsigned int ioctl, unsigned long arg)
1929 {
1930         struct kvm *kvm = filp->private_data;
1931         void __user *argp = (void __user *)arg;
1932         struct kvm_device_attr attr;
1933         int r;
1934
1935         switch (ioctl) {
1936         case KVM_S390_INTERRUPT: {
1937                 struct kvm_s390_interrupt s390int;
1938
1939                 r = -EFAULT;
1940                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1941                         break;
1942                 r = kvm_s390_inject_vm(kvm, &s390int);
1943                 break;
1944         }
1945         case KVM_CREATE_IRQCHIP: {
1946                 struct kvm_irq_routing_entry routing;
1947
1948                 r = -EINVAL;
1949                 if (kvm->arch.use_irqchip) {
1950                         /* Set up dummy routing. */
1951                         memset(&routing, 0, sizeof(routing));
1952                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1953                 }
1954                 break;
1955         }
1956         case KVM_SET_DEVICE_ATTR: {
1957                 r = -EFAULT;
1958                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1959                         break;
1960                 r = kvm_s390_vm_set_attr(kvm, &attr);
1961                 break;
1962         }
1963         case KVM_GET_DEVICE_ATTR: {
1964                 r = -EFAULT;
1965                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1966                         break;
1967                 r = kvm_s390_vm_get_attr(kvm, &attr);
1968                 break;
1969         }
1970         case KVM_HAS_DEVICE_ATTR: {
1971                 r = -EFAULT;
1972                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1973                         break;
1974                 r = kvm_s390_vm_has_attr(kvm, &attr);
1975                 break;
1976         }
1977         case KVM_S390_GET_SKEYS: {
1978                 struct kvm_s390_skeys args;
1979
1980                 r = -EFAULT;
1981                 if (copy_from_user(&args, argp,
1982                                    sizeof(struct kvm_s390_skeys)))
1983                         break;
1984                 r = kvm_s390_get_skeys(kvm, &args);
1985                 break;
1986         }
1987         case KVM_S390_SET_SKEYS: {
1988                 struct kvm_s390_skeys args;
1989
1990                 r = -EFAULT;
1991                 if (copy_from_user(&args, argp,
1992                                    sizeof(struct kvm_s390_skeys)))
1993                         break;
1994                 r = kvm_s390_set_skeys(kvm, &args);
1995                 break;
1996         }
1997         case KVM_S390_GET_CMMA_BITS: {
1998                 struct kvm_s390_cmma_log args;
1999
2000                 r = -EFAULT;
2001                 if (copy_from_user(&args, argp, sizeof(args)))
2002                         break;
2003                 mutex_lock(&kvm->slots_lock);
2004                 r = kvm_s390_get_cmma_bits(kvm, &args);
2005                 mutex_unlock(&kvm->slots_lock);
2006                 if (!r) {
2007                         r = copy_to_user(argp, &args, sizeof(args));
2008                         if (r)
2009                                 r = -EFAULT;
2010                 }
2011                 break;
2012         }
2013         case KVM_S390_SET_CMMA_BITS: {
2014                 struct kvm_s390_cmma_log args;
2015
2016                 r = -EFAULT;
2017                 if (copy_from_user(&args, argp, sizeof(args)))
2018                         break;
2019                 mutex_lock(&kvm->slots_lock);
2020                 r = kvm_s390_set_cmma_bits(kvm, &args);
2021                 mutex_unlock(&kvm->slots_lock);
2022                 break;
2023         }
2024         default:
2025                 r = -ENOTTY;
2026         }
2027
2028         return r;
2029 }
2030
2031 static int kvm_s390_apxa_installed(void)
2032 {
2033         struct ap_config_info info;
2034
2035         if (ap_instructions_available()) {
2036                 if (ap_qci(&info) == 0)
2037                         return info.apxa;
2038         }
2039
2040         return 0;
2041 }
2042
2043 /*
2044  * The format of the crypto control block (CRYCB) is specified in the 3 low
2045  * order bits of the CRYCB designation (CRYCBD) field as follows:
2046  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2047  *           AP extended addressing (APXA) facility are installed.
2048  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2049  * Format 2: Both the APXA and MSAX3 facilities are installed
2050  */
2051 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2052 {
2053         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2054
2055         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2056         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2057
2058         /* Check whether MSAX3 is installed */
2059         if (!test_kvm_facility(kvm, 76))
2060                 return;
2061
2062         if (kvm_s390_apxa_installed())
2063                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2064         else
2065                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2066 }
2067
2068 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2069                                unsigned long *aqm, unsigned long *adm)
2070 {
2071         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2072
2073         mutex_lock(&kvm->lock);
2074         kvm_s390_vcpu_block_all(kvm);
2075
2076         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2077         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2078                 memcpy(crycb->apcb1.apm, apm, 32);
2079                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2080                          apm[0], apm[1], apm[2], apm[3]);
2081                 memcpy(crycb->apcb1.aqm, aqm, 32);
2082                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2083                          aqm[0], aqm[1], aqm[2], aqm[3]);
2084                 memcpy(crycb->apcb1.adm, adm, 32);
2085                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2086                          adm[0], adm[1], adm[2], adm[3]);
2087                 break;
2088         case CRYCB_FORMAT1:
2089         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2090                 memcpy(crycb->apcb0.apm, apm, 8);
2091                 memcpy(crycb->apcb0.aqm, aqm, 2);
2092                 memcpy(crycb->apcb0.adm, adm, 2);
2093                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2094                          apm[0], *((unsigned short *)aqm),
2095                          *((unsigned short *)adm));
2096                 break;
2097         default:        /* Can not happen */
2098                 break;
2099         }
2100
2101         /* recreate the shadow crycb for each vcpu */
2102         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2103         kvm_s390_vcpu_unblock_all(kvm);
2104         mutex_unlock(&kvm->lock);
2105 }
2106 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2107
2108 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2109 {
2110         mutex_lock(&kvm->lock);
2111         kvm_s390_vcpu_block_all(kvm);
2112
2113         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2114                sizeof(kvm->arch.crypto.crycb->apcb0));
2115         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2116                sizeof(kvm->arch.crypto.crycb->apcb1));
2117
2118         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2119         /* recreate the shadow crycb for each vcpu */
2120         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2121         kvm_s390_vcpu_unblock_all(kvm);
2122         mutex_unlock(&kvm->lock);
2123 }
2124 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2125
2126 static u64 kvm_s390_get_initial_cpuid(void)
2127 {
2128         struct cpuid cpuid;
2129
2130         get_cpu_id(&cpuid);
2131         cpuid.version = 0xff;
2132         return *((u64 *) &cpuid);
2133 }
2134
2135 static void kvm_s390_crypto_init(struct kvm *kvm)
2136 {
2137         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2138         kvm_s390_set_crycb_format(kvm);
2139
2140         if (!test_kvm_facility(kvm, 76))
2141                 return;
2142
2143         /* Enable AES/DEA protected key functions by default */
2144         kvm->arch.crypto.aes_kw = 1;
2145         kvm->arch.crypto.dea_kw = 1;
2146         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2147                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2148         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2149                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2150 }
2151
2152 static void sca_dispose(struct kvm *kvm)
2153 {
2154         if (kvm->arch.use_esca)
2155                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2156         else
2157                 free_page((unsigned long)(kvm->arch.sca));
2158         kvm->arch.sca = NULL;
2159 }
2160
2161 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2162 {
2163         gfp_t alloc_flags = GFP_KERNEL;
2164         int i, rc;
2165         char debug_name[16];
2166         static unsigned long sca_offset;
2167
2168         rc = -EINVAL;
2169 #ifdef CONFIG_KVM_S390_UCONTROL
2170         if (type & ~KVM_VM_S390_UCONTROL)
2171                 goto out_err;
2172         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2173                 goto out_err;
2174 #else
2175         if (type)
2176                 goto out_err;
2177 #endif
2178
2179         rc = s390_enable_sie();
2180         if (rc)
2181                 goto out_err;
2182
2183         rc = -ENOMEM;
2184
2185         if (!sclp.has_64bscao)
2186                 alloc_flags |= GFP_DMA;
2187         rwlock_init(&kvm->arch.sca_lock);
2188         /* start with basic SCA */
2189         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2190         if (!kvm->arch.sca)
2191                 goto out_err;
2192         spin_lock(&kvm_lock);
2193         sca_offset += 16;
2194         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2195                 sca_offset = 0;
2196         kvm->arch.sca = (struct bsca_block *)
2197                         ((char *) kvm->arch.sca + sca_offset);
2198         spin_unlock(&kvm_lock);
2199
2200         sprintf(debug_name, "kvm-%u", current->pid);
2201
2202         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2203         if (!kvm->arch.dbf)
2204                 goto out_err;
2205
2206         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2207         kvm->arch.sie_page2 =
2208              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2209         if (!kvm->arch.sie_page2)
2210                 goto out_err;
2211
2212         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2213
2214         for (i = 0; i < kvm_s390_fac_size(); i++) {
2215                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2216                                               (kvm_s390_fac_base[i] |
2217                                                kvm_s390_fac_ext[i]);
2218                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2219                                               kvm_s390_fac_base[i];
2220         }
2221
2222         /* we are always in czam mode - even on pre z14 machines */
2223         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2224         set_kvm_facility(kvm->arch.model.fac_list, 138);
2225         /* we emulate STHYI in kvm */
2226         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2227         set_kvm_facility(kvm->arch.model.fac_list, 74);
2228         if (MACHINE_HAS_TLB_GUEST) {
2229                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2230                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2231         }
2232
2233         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2234         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2235
2236         kvm_s390_crypto_init(kvm);
2237
2238         mutex_init(&kvm->arch.float_int.ais_lock);
2239         spin_lock_init(&kvm->arch.float_int.lock);
2240         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2241                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2242         init_waitqueue_head(&kvm->arch.ipte_wq);
2243         mutex_init(&kvm->arch.ipte_mutex);
2244
2245         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2246         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2247
2248         if (type & KVM_VM_S390_UCONTROL) {
2249                 kvm->arch.gmap = NULL;
2250                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2251         } else {
2252                 if (sclp.hamax == U64_MAX)
2253                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2254                 else
2255                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2256                                                     sclp.hamax + 1);
2257                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2258                 if (!kvm->arch.gmap)
2259                         goto out_err;
2260                 kvm->arch.gmap->private = kvm;
2261                 kvm->arch.gmap->pfault_enabled = 0;
2262         }
2263
2264         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2265         kvm->arch.use_skf = sclp.has_skey;
2266         spin_lock_init(&kvm->arch.start_stop_lock);
2267         kvm_s390_vsie_init(kvm);
2268         kvm_s390_gisa_init(kvm);
2269         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2270
2271         return 0;
2272 out_err:
2273         free_page((unsigned long)kvm->arch.sie_page2);
2274         debug_unregister(kvm->arch.dbf);
2275         sca_dispose(kvm);
2276         KVM_EVENT(3, "creation of vm failed: %d", rc);
2277         return rc;
2278 }
2279
2280 bool kvm_arch_has_vcpu_debugfs(void)
2281 {
2282         return false;
2283 }
2284
2285 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2286 {
2287         return 0;
2288 }
2289
2290 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2291 {
2292         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2293         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2294         kvm_s390_clear_local_irqs(vcpu);
2295         kvm_clear_async_pf_completion_queue(vcpu);
2296         if (!kvm_is_ucontrol(vcpu->kvm))
2297                 sca_del_vcpu(vcpu);
2298
2299         if (kvm_is_ucontrol(vcpu->kvm))
2300                 gmap_remove(vcpu->arch.gmap);
2301
2302         if (vcpu->kvm->arch.use_cmma)
2303                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2304         free_page((unsigned long)(vcpu->arch.sie_block));
2305
2306         kvm_vcpu_uninit(vcpu);
2307         kmem_cache_free(kvm_vcpu_cache, vcpu);
2308 }
2309
2310 static void kvm_free_vcpus(struct kvm *kvm)
2311 {
2312         unsigned int i;
2313         struct kvm_vcpu *vcpu;
2314
2315         kvm_for_each_vcpu(i, vcpu, kvm)
2316                 kvm_arch_vcpu_destroy(vcpu);
2317
2318         mutex_lock(&kvm->lock);
2319         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2320                 kvm->vcpus[i] = NULL;
2321
2322         atomic_set(&kvm->online_vcpus, 0);
2323         mutex_unlock(&kvm->lock);
2324 }
2325
2326 void kvm_arch_destroy_vm(struct kvm *kvm)
2327 {
2328         kvm_free_vcpus(kvm);
2329         sca_dispose(kvm);
2330         debug_unregister(kvm->arch.dbf);
2331         kvm_s390_gisa_destroy(kvm);
2332         free_page((unsigned long)kvm->arch.sie_page2);
2333         if (!kvm_is_ucontrol(kvm))
2334                 gmap_remove(kvm->arch.gmap);
2335         kvm_s390_destroy_adapters(kvm);
2336         kvm_s390_clear_float_irqs(kvm);
2337         kvm_s390_vsie_destroy(kvm);
2338         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2339 }
2340
2341 /* Section: vcpu related */
2342 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2343 {
2344         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2345         if (!vcpu->arch.gmap)
2346                 return -ENOMEM;
2347         vcpu->arch.gmap->private = vcpu->kvm;
2348
2349         return 0;
2350 }
2351
2352 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2353 {
2354         if (!kvm_s390_use_sca_entries())
2355                 return;
2356         read_lock(&vcpu->kvm->arch.sca_lock);
2357         if (vcpu->kvm->arch.use_esca) {
2358                 struct esca_block *sca = vcpu->kvm->arch.sca;
2359
2360                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2361                 sca->cpu[vcpu->vcpu_id].sda = 0;
2362         } else {
2363                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2364
2365                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2366                 sca->cpu[vcpu->vcpu_id].sda = 0;
2367         }
2368         read_unlock(&vcpu->kvm->arch.sca_lock);
2369 }
2370
2371 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2372 {
2373         if (!kvm_s390_use_sca_entries()) {
2374                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2375
2376                 /* we still need the basic sca for the ipte control */
2377                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2378                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2379                 return;
2380         }
2381         read_lock(&vcpu->kvm->arch.sca_lock);
2382         if (vcpu->kvm->arch.use_esca) {
2383                 struct esca_block *sca = vcpu->kvm->arch.sca;
2384
2385                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2386                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2387                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2388                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2389                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2390         } else {
2391                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2392
2393                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2394                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2395                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2396                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2397         }
2398         read_unlock(&vcpu->kvm->arch.sca_lock);
2399 }
2400
2401 /* Basic SCA to Extended SCA data copy routines */
2402 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2403 {
2404         d->sda = s->sda;
2405         d->sigp_ctrl.c = s->sigp_ctrl.c;
2406         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2407 }
2408
2409 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2410 {
2411         int i;
2412
2413         d->ipte_control = s->ipte_control;
2414         d->mcn[0] = s->mcn;
2415         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2416                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2417 }
2418
2419 static int sca_switch_to_extended(struct kvm *kvm)
2420 {
2421         struct bsca_block *old_sca = kvm->arch.sca;
2422         struct esca_block *new_sca;
2423         struct kvm_vcpu *vcpu;
2424         unsigned int vcpu_idx;
2425         u32 scaol, scaoh;
2426
2427         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2428         if (!new_sca)
2429                 return -ENOMEM;
2430
2431         scaoh = (u32)((u64)(new_sca) >> 32);
2432         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2433
2434         kvm_s390_vcpu_block_all(kvm);
2435         write_lock(&kvm->arch.sca_lock);
2436
2437         sca_copy_b_to_e(new_sca, old_sca);
2438
2439         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2440                 vcpu->arch.sie_block->scaoh = scaoh;
2441                 vcpu->arch.sie_block->scaol = scaol;
2442                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2443         }
2444         kvm->arch.sca = new_sca;
2445         kvm->arch.use_esca = 1;
2446
2447         write_unlock(&kvm->arch.sca_lock);
2448         kvm_s390_vcpu_unblock_all(kvm);
2449
2450         free_page((unsigned long)old_sca);
2451
2452         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2453                  old_sca, kvm->arch.sca);
2454         return 0;
2455 }
2456
2457 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2458 {
2459         int rc;
2460
2461         if (!kvm_s390_use_sca_entries()) {
2462                 if (id < KVM_MAX_VCPUS)
2463                         return true;
2464                 return false;
2465         }
2466         if (id < KVM_S390_BSCA_CPU_SLOTS)
2467                 return true;
2468         if (!sclp.has_esca || !sclp.has_64bscao)
2469                 return false;
2470
2471         mutex_lock(&kvm->lock);
2472         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2473         mutex_unlock(&kvm->lock);
2474
2475         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2476 }
2477
2478 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2479 {
2480         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2481         kvm_clear_async_pf_completion_queue(vcpu);
2482         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2483                                     KVM_SYNC_GPRS |
2484                                     KVM_SYNC_ACRS |
2485                                     KVM_SYNC_CRS |
2486                                     KVM_SYNC_ARCH0 |
2487                                     KVM_SYNC_PFAULT;
2488         kvm_s390_set_prefix(vcpu, 0);
2489         if (test_kvm_facility(vcpu->kvm, 64))
2490                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2491         if (test_kvm_facility(vcpu->kvm, 82))
2492                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2493         if (test_kvm_facility(vcpu->kvm, 133))
2494                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2495         if (test_kvm_facility(vcpu->kvm, 156))
2496                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2497         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2498          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2499          */
2500         if (MACHINE_HAS_VX)
2501                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2502         else
2503                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2504
2505         if (kvm_is_ucontrol(vcpu->kvm))
2506                 return __kvm_ucontrol_vcpu_init(vcpu);
2507
2508         return 0;
2509 }
2510
2511 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2512 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2513 {
2514         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2515         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2516         vcpu->arch.cputm_start = get_tod_clock_fast();
2517         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2518 }
2519
2520 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2521 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2522 {
2523         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2524         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2525         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2526         vcpu->arch.cputm_start = 0;
2527         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2528 }
2529
2530 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2531 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2532 {
2533         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2534         vcpu->arch.cputm_enabled = true;
2535         __start_cpu_timer_accounting(vcpu);
2536 }
2537
2538 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2539 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2540 {
2541         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2542         __stop_cpu_timer_accounting(vcpu);
2543         vcpu->arch.cputm_enabled = false;
2544 }
2545
2546 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2547 {
2548         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2549         __enable_cpu_timer_accounting(vcpu);
2550         preempt_enable();
2551 }
2552
2553 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2554 {
2555         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2556         __disable_cpu_timer_accounting(vcpu);
2557         preempt_enable();
2558 }
2559
2560 /* set the cpu timer - may only be called from the VCPU thread itself */
2561 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2562 {
2563         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2564         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2565         if (vcpu->arch.cputm_enabled)
2566                 vcpu->arch.cputm_start = get_tod_clock_fast();
2567         vcpu->arch.sie_block->cputm = cputm;
2568         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2569         preempt_enable();
2570 }
2571
2572 /* update and get the cpu timer - can also be called from other VCPU threads */
2573 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2574 {
2575         unsigned int seq;
2576         __u64 value;
2577
2578         if (unlikely(!vcpu->arch.cputm_enabled))
2579                 return vcpu->arch.sie_block->cputm;
2580
2581         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2582         do {
2583                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2584                 /*
2585                  * If the writer would ever execute a read in the critical
2586                  * section, e.g. in irq context, we have a deadlock.
2587                  */
2588                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2589                 value = vcpu->arch.sie_block->cputm;
2590                 /* if cputm_start is 0, accounting is being started/stopped */
2591                 if (likely(vcpu->arch.cputm_start))
2592                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2593         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2594         preempt_enable();
2595         return value;
2596 }
2597
2598 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2599 {
2600
2601         gmap_enable(vcpu->arch.enabled_gmap);
2602         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2603         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2604                 __start_cpu_timer_accounting(vcpu);
2605         vcpu->cpu = cpu;
2606 }
2607
2608 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2609 {
2610         vcpu->cpu = -1;
2611         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2612                 __stop_cpu_timer_accounting(vcpu);
2613         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2614         vcpu->arch.enabled_gmap = gmap_get_enabled();
2615         gmap_disable(vcpu->arch.enabled_gmap);
2616
2617 }
2618
2619 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2620 {
2621         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2622         vcpu->arch.sie_block->gpsw.mask = 0UL;
2623         vcpu->arch.sie_block->gpsw.addr = 0UL;
2624         kvm_s390_set_prefix(vcpu, 0);
2625         kvm_s390_set_cpu_timer(vcpu, 0);
2626         vcpu->arch.sie_block->ckc       = 0UL;
2627         vcpu->arch.sie_block->todpr     = 0;
2628         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2629         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2630                                         CR0_INTERRUPT_KEY_SUBMASK |
2631                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2632         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2633                                         CR14_UNUSED_33 |
2634                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2635         /* make sure the new fpc will be lazily loaded */
2636         save_fpu_regs();
2637         current->thread.fpu.fpc = 0;
2638         vcpu->arch.sie_block->gbea = 1;
2639         vcpu->arch.sie_block->pp = 0;
2640         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2641         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2642         kvm_clear_async_pf_completion_queue(vcpu);
2643         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2644                 kvm_s390_vcpu_stop(vcpu);
2645         kvm_s390_clear_local_irqs(vcpu);
2646 }
2647
2648 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2649 {
2650         mutex_lock(&vcpu->kvm->lock);
2651         preempt_disable();
2652         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2653         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2654         preempt_enable();
2655         mutex_unlock(&vcpu->kvm->lock);
2656         if (!kvm_is_ucontrol(vcpu->kvm)) {
2657                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2658                 sca_add_vcpu(vcpu);
2659         }
2660         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2661                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2662         /* make vcpu_load load the right gmap on the first trigger */
2663         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2664 }
2665
2666 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2667 {
2668         /*
2669          * If the AP instructions are not being interpreted and the MSAX3
2670          * facility is not configured for the guest, there is nothing to set up.
2671          */
2672         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2673                 return;
2674
2675         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2676         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2677         vcpu->arch.sie_block->eca &= ~ECA_APIE;
2678
2679         if (vcpu->kvm->arch.crypto.apie)
2680                 vcpu->arch.sie_block->eca |= ECA_APIE;
2681
2682         /* Set up protected key support */
2683         if (vcpu->kvm->arch.crypto.aes_kw)
2684                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2685         if (vcpu->kvm->arch.crypto.dea_kw)
2686                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2687 }
2688
2689 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2690 {
2691         free_page(vcpu->arch.sie_block->cbrlo);
2692         vcpu->arch.sie_block->cbrlo = 0;
2693 }
2694
2695 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2696 {
2697         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2698         if (!vcpu->arch.sie_block->cbrlo)
2699                 return -ENOMEM;
2700         return 0;
2701 }
2702
2703 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2704 {
2705         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2706
2707         vcpu->arch.sie_block->ibc = model->ibc;
2708         if (test_kvm_facility(vcpu->kvm, 7))
2709                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2710 }
2711
2712 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2713 {
2714         int rc = 0;
2715
2716         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2717                                                     CPUSTAT_SM |
2718                                                     CPUSTAT_STOPPED);
2719
2720         if (test_kvm_facility(vcpu->kvm, 78))
2721                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2722         else if (test_kvm_facility(vcpu->kvm, 8))
2723                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2724
2725         kvm_s390_vcpu_setup_model(vcpu);
2726
2727         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2728         if (MACHINE_HAS_ESOP)
2729                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2730         if (test_kvm_facility(vcpu->kvm, 9))
2731                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2732         if (test_kvm_facility(vcpu->kvm, 73))
2733                 vcpu->arch.sie_block->ecb |= ECB_TE;
2734
2735         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2736                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2737         if (test_kvm_facility(vcpu->kvm, 130))
2738                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2739         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2740         if (sclp.has_cei)
2741                 vcpu->arch.sie_block->eca |= ECA_CEI;
2742         if (sclp.has_ib)
2743                 vcpu->arch.sie_block->eca |= ECA_IB;
2744         if (sclp.has_siif)
2745                 vcpu->arch.sie_block->eca |= ECA_SII;
2746         if (sclp.has_sigpif)
2747                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2748         if (test_kvm_facility(vcpu->kvm, 129)) {
2749                 vcpu->arch.sie_block->eca |= ECA_VX;
2750                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2751         }
2752         if (test_kvm_facility(vcpu->kvm, 139))
2753                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2754         if (test_kvm_facility(vcpu->kvm, 156))
2755                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2756         if (vcpu->arch.sie_block->gd) {
2757                 vcpu->arch.sie_block->eca |= ECA_AIV;
2758                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2759                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2760         }
2761         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2762                                         | SDNXC;
2763         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2764
2765         if (sclp.has_kss)
2766                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2767         else
2768                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2769
2770         if (vcpu->kvm->arch.use_cmma) {
2771                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2772                 if (rc)
2773                         return rc;
2774         }
2775         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2776         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2777
2778         vcpu->arch.sie_block->hpid = HPID_KVM;
2779
2780         kvm_s390_vcpu_crypto_setup(vcpu);
2781
2782         return rc;
2783 }
2784
2785 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2786                                       unsigned int id)
2787 {
2788         struct kvm_vcpu *vcpu;
2789         struct sie_page *sie_page;
2790         int rc = -EINVAL;
2791
2792         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2793                 goto out;
2794
2795         rc = -ENOMEM;
2796
2797         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2798         if (!vcpu)
2799                 goto out;
2800
2801         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2802         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2803         if (!sie_page)
2804                 goto out_free_cpu;
2805
2806         vcpu->arch.sie_block = &sie_page->sie_block;
2807         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2808
2809         /* the real guest size will always be smaller than msl */
2810         vcpu->arch.sie_block->mso = 0;
2811         vcpu->arch.sie_block->msl = sclp.hamax;
2812
2813         vcpu->arch.sie_block->icpua = id;
2814         spin_lock_init(&vcpu->arch.local_int.lock);
2815         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2816         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2817                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2818         seqcount_init(&vcpu->arch.cputm_seqcount);
2819
2820         rc = kvm_vcpu_init(vcpu, kvm, id);
2821         if (rc)
2822                 goto out_free_sie_block;
2823         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2824                  vcpu->arch.sie_block);
2825         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2826
2827         return vcpu;
2828 out_free_sie_block:
2829         free_page((unsigned long)(vcpu->arch.sie_block));
2830 out_free_cpu:
2831         kmem_cache_free(kvm_vcpu_cache, vcpu);
2832 out:
2833         return ERR_PTR(rc);
2834 }
2835
2836 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2837 {
2838         return kvm_s390_vcpu_has_irq(vcpu, 0);
2839 }
2840
2841 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2842 {
2843         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2844 }
2845
2846 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2847 {
2848         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2849         exit_sie(vcpu);
2850 }
2851
2852 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2853 {
2854         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2855 }
2856
2857 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2858 {
2859         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2860         exit_sie(vcpu);
2861 }
2862
2863 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
2864 {
2865         return atomic_read(&vcpu->arch.sie_block->prog20) &
2866                (PROG_BLOCK_SIE | PROG_REQUEST);
2867 }
2868
2869 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2870 {
2871         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2872 }
2873
2874 /*
2875  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
2876  * If the CPU is not running (e.g. waiting as idle) the function will
2877  * return immediately. */
2878 void exit_sie(struct kvm_vcpu *vcpu)
2879 {
2880         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2881         kvm_s390_vsie_kick(vcpu);
2882         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2883                 cpu_relax();
2884 }
2885
2886 /* Kick a guest cpu out of SIE to process a request synchronously */
2887 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2888 {
2889         kvm_make_request(req, vcpu);
2890         kvm_s390_vcpu_request(vcpu);
2891 }
2892
2893 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2894                               unsigned long end)
2895 {
2896         struct kvm *kvm = gmap->private;
2897         struct kvm_vcpu *vcpu;
2898         unsigned long prefix;
2899         int i;
2900
2901         if (gmap_is_shadow(gmap))
2902                 return;
2903         if (start >= 1UL << 31)
2904                 /* We are only interested in prefix pages */
2905                 return;
2906         kvm_for_each_vcpu(i, vcpu, kvm) {
2907                 /* match against both prefix pages */
2908                 prefix = kvm_s390_get_prefix(vcpu);
2909                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2910                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2911                                    start, end);
2912                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2913                 }
2914         }
2915 }
2916
2917 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2918 {
2919         /* kvm common code refers to this, but never calls it */
2920         BUG();
2921         return 0;
2922 }
2923
2924 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2925                                            struct kvm_one_reg *reg)
2926 {
2927         int r = -EINVAL;
2928
2929         switch (reg->id) {
2930         case KVM_REG_S390_TODPR:
2931                 r = put_user(vcpu->arch.sie_block->todpr,
2932                              (u32 __user *)reg->addr);
2933                 break;
2934         case KVM_REG_S390_EPOCHDIFF:
2935                 r = put_user(vcpu->arch.sie_block->epoch,
2936                              (u64 __user *)reg->addr);
2937                 break;
2938         case KVM_REG_S390_CPU_TIMER:
2939                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2940                              (u64 __user *)reg->addr);
2941                 break;
2942         case KVM_REG_S390_CLOCK_COMP:
2943                 r = put_user(vcpu->arch.sie_block->ckc,
2944                              (u64 __user *)reg->addr);
2945                 break;
2946         case KVM_REG_S390_PFTOKEN:
2947                 r = put_user(vcpu->arch.pfault_token,
2948                              (u64 __user *)reg->addr);
2949                 break;
2950         case KVM_REG_S390_PFCOMPARE:
2951                 r = put_user(vcpu->arch.pfault_compare,
2952                              (u64 __user *)reg->addr);
2953                 break;
2954         case KVM_REG_S390_PFSELECT:
2955                 r = put_user(vcpu->arch.pfault_select,
2956                              (u64 __user *)reg->addr);
2957                 break;
2958         case KVM_REG_S390_PP:
2959                 r = put_user(vcpu->arch.sie_block->pp,
2960                              (u64 __user *)reg->addr);
2961                 break;
2962         case KVM_REG_S390_GBEA:
2963                 r = put_user(vcpu->arch.sie_block->gbea,
2964                              (u64 __user *)reg->addr);
2965                 break;
2966         default:
2967                 break;
2968         }
2969
2970         return r;
2971 }
2972
2973 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2974                                            struct kvm_one_reg *reg)
2975 {
2976         int r = -EINVAL;
2977         __u64 val;
2978
2979         switch (reg->id) {
2980         case KVM_REG_S390_TODPR:
2981                 r = get_user(vcpu->arch.sie_block->todpr,
2982                              (u32 __user *)reg->addr);
2983                 break;
2984         case KVM_REG_S390_EPOCHDIFF:
2985                 r = get_user(vcpu->arch.sie_block->epoch,
2986                              (u64 __user *)reg->addr);
2987                 break;
2988         case KVM_REG_S390_CPU_TIMER:
2989                 r = get_user(val, (u64 __user *)reg->addr);
2990                 if (!r)
2991                         kvm_s390_set_cpu_timer(vcpu, val);
2992                 break;
2993         case KVM_REG_S390_CLOCK_COMP:
2994                 r = get_user(vcpu->arch.sie_block->ckc,
2995                              (u64 __user *)reg->addr);
2996                 break;
2997         case KVM_REG_S390_PFTOKEN:
2998                 r = get_user(vcpu->arch.pfault_token,
2999                              (u64 __user *)reg->addr);
3000                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3001                         kvm_clear_async_pf_completion_queue(vcpu);
3002                 break;
3003         case KVM_REG_S390_PFCOMPARE:
3004                 r = get_user(vcpu->arch.pfault_compare,
3005                              (u64 __user *)reg->addr);
3006                 break;
3007         case KVM_REG_S390_PFSELECT:
3008                 r = get_user(vcpu->arch.pfault_select,
3009                              (u64 __user *)reg->addr);
3010                 break;
3011         case KVM_REG_S390_PP:
3012                 r = get_user(vcpu->arch.sie_block->pp,
3013                              (u64 __user *)reg->addr);
3014                 break;
3015         case KVM_REG_S390_GBEA:
3016                 r = get_user(vcpu->arch.sie_block->gbea,
3017                              (u64 __user *)reg->addr);
3018                 break;
3019         default:
3020                 break;
3021         }
3022
3023         return r;
3024 }
3025
3026 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3027 {
3028         kvm_s390_vcpu_initial_reset(vcpu);
3029         return 0;
3030 }
3031
3032 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3033 {
3034         vcpu_load(vcpu);
3035         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3036         vcpu_put(vcpu);
3037         return 0;
3038 }
3039
3040 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3041 {
3042         vcpu_load(vcpu);
3043         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3044         vcpu_put(vcpu);
3045         return 0;
3046 }
3047
3048 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3049                                   struct kvm_sregs *sregs)
3050 {
3051         vcpu_load(vcpu);
3052
3053         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3054         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3055
3056         vcpu_put(vcpu);
3057         return 0;
3058 }
3059
3060 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3061                                   struct kvm_sregs *sregs)
3062 {
3063         vcpu_load(vcpu);
3064
3065         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3066         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3067
3068         vcpu_put(vcpu);
3069         return 0;
3070 }
3071
3072 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3073 {
3074         int ret = 0;
3075
3076         vcpu_load(vcpu);
3077
3078         if (test_fp_ctl(fpu->fpc)) {
3079                 ret = -EINVAL;
3080                 goto out;
3081         }
3082         vcpu->run->s.regs.fpc = fpu->fpc;
3083         if (MACHINE_HAS_VX)
3084                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3085                                  (freg_t *) fpu->fprs);
3086         else
3087                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3088
3089 out:
3090         vcpu_put(vcpu);
3091         return ret;
3092 }
3093
3094 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3095 {
3096         vcpu_load(vcpu);
3097
3098         /* make sure we have the latest values */
3099         save_fpu_regs();
3100         if (MACHINE_HAS_VX)
3101                 convert_vx_to_fp((freg_t *) fpu->fprs,
3102                                  (__vector128 *) vcpu->run->s.regs.vrs);
3103         else
3104                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3105         fpu->fpc = vcpu->run->s.regs.fpc;
3106
3107         vcpu_put(vcpu);
3108         return 0;
3109 }
3110
3111 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3112 {
3113         int rc = 0;
3114
3115         if (!is_vcpu_stopped(vcpu))
3116                 rc = -EBUSY;
3117         else {
3118                 vcpu->run->psw_mask = psw.mask;
3119                 vcpu->run->psw_addr = psw.addr;
3120         }
3121         return rc;
3122 }
3123
3124 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3125                                   struct kvm_translation *tr)
3126 {
3127         return -EINVAL; /* not implemented yet */
3128 }
3129
3130 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3131                               KVM_GUESTDBG_USE_HW_BP | \
3132                               KVM_GUESTDBG_ENABLE)
3133
3134 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3135                                         struct kvm_guest_debug *dbg)
3136 {
3137         int rc = 0;
3138
3139         vcpu_load(vcpu);
3140
3141         vcpu->guest_debug = 0;
3142         kvm_s390_clear_bp_data(vcpu);
3143
3144         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3145                 rc = -EINVAL;
3146                 goto out;
3147         }
3148         if (!sclp.has_gpere) {
3149                 rc = -EINVAL;
3150                 goto out;
3151         }
3152
3153         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3154                 vcpu->guest_debug = dbg->control;
3155                 /* enforce guest PER */
3156                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3157
3158                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3159                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3160         } else {
3161                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3162                 vcpu->arch.guestdbg.last_bp = 0;
3163         }
3164
3165         if (rc) {
3166                 vcpu->guest_debug = 0;
3167                 kvm_s390_clear_bp_data(vcpu);
3168                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3169         }
3170
3171 out:
3172         vcpu_put(vcpu);
3173         return rc;
3174 }
3175
3176 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3177                                     struct kvm_mp_state *mp_state)
3178 {
3179         int ret;
3180
3181         vcpu_load(vcpu);
3182
3183         /* CHECK_STOP and LOAD are not supported yet */
3184         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3185                                       KVM_MP_STATE_OPERATING;
3186
3187         vcpu_put(vcpu);
3188         return ret;
3189 }
3190
3191 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3192                                     struct kvm_mp_state *mp_state)
3193 {
3194         int rc = 0;
3195
3196         vcpu_load(vcpu);
3197
3198         /* user space knows about this interface - let it control the state */
3199         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3200
3201         switch (mp_state->mp_state) {
3202         case KVM_MP_STATE_STOPPED:
3203                 kvm_s390_vcpu_stop(vcpu);
3204                 break;
3205         case KVM_MP_STATE_OPERATING:
3206                 kvm_s390_vcpu_start(vcpu);
3207                 break;
3208         case KVM_MP_STATE_LOAD:
3209         case KVM_MP_STATE_CHECK_STOP:
3210                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3211         default:
3212                 rc = -ENXIO;
3213         }
3214
3215         vcpu_put(vcpu);
3216         return rc;
3217 }
3218
3219 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3220 {
3221         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3222 }
3223
3224 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3225 {
3226 retry:
3227         kvm_s390_vcpu_request_handled(vcpu);
3228         if (!kvm_request_pending(vcpu))
3229                 return 0;
3230         /*
3231          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3232          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3233          * This ensures that the ipte instruction for this request has
3234          * already finished. We might race against a second unmapper that
3235          * wants to set the blocking bit. Lets just retry the request loop.
3236          */
3237         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3238                 int rc;
3239                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3240                                           kvm_s390_get_prefix(vcpu),
3241                                           PAGE_SIZE * 2, PROT_WRITE);
3242                 if (rc) {
3243                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3244                         return rc;
3245                 }
3246                 goto retry;
3247         }
3248
3249         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3250                 vcpu->arch.sie_block->ihcpu = 0xffff;
3251                 goto retry;
3252         }
3253
3254         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3255                 if (!ibs_enabled(vcpu)) {
3256                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3257                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3258                 }
3259                 goto retry;
3260         }
3261
3262         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3263                 if (ibs_enabled(vcpu)) {
3264                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3265                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3266                 }
3267                 goto retry;
3268         }
3269
3270         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3271                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3272                 goto retry;
3273         }
3274
3275         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3276                 /*
3277                  * Disable CMM virtualization; we will emulate the ESSA
3278                  * instruction manually, in order to provide additional
3279                  * functionalities needed for live migration.
3280                  */
3281                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3282                 goto retry;
3283         }
3284
3285         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3286                 /*
3287                  * Re-enable CMM virtualization if CMMA is available and
3288                  * CMM has been used.
3289                  */
3290                 if ((vcpu->kvm->arch.use_cmma) &&
3291                     (vcpu->kvm->mm->context.uses_cmm))
3292                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3293                 goto retry;
3294         }
3295
3296         /* nothing to do, just clear the request */
3297         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3298         /* we left the vsie handler, nothing to do, just clear the request */
3299         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3300
3301         return 0;
3302 }
3303
3304 void kvm_s390_set_tod_clock(struct kvm *kvm,
3305                             const struct kvm_s390_vm_tod_clock *gtod)
3306 {
3307         struct kvm_vcpu *vcpu;
3308         struct kvm_s390_tod_clock_ext htod;
3309         int i;
3310
3311         mutex_lock(&kvm->lock);
3312         preempt_disable();
3313
3314         get_tod_clock_ext((char *)&htod);
3315
3316         kvm->arch.epoch = gtod->tod - htod.tod;
3317         kvm->arch.epdx = 0;
3318         if (test_kvm_facility(kvm, 139)) {
3319                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3320                 if (kvm->arch.epoch > gtod->tod)
3321                         kvm->arch.epdx -= 1;
3322         }
3323
3324         kvm_s390_vcpu_block_all(kvm);
3325         kvm_for_each_vcpu(i, vcpu, kvm) {
3326                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3327                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3328         }
3329
3330         kvm_s390_vcpu_unblock_all(kvm);
3331         preempt_enable();
3332         mutex_unlock(&kvm->lock);
3333 }
3334
3335 /**
3336  * kvm_arch_fault_in_page - fault-in guest page if necessary
3337  * @vcpu: The corresponding virtual cpu
3338  * @gpa: Guest physical address
3339  * @writable: Whether the page should be writable or not
3340  *
3341  * Make sure that a guest page has been faulted-in on the host.
3342  *
3343  * Return: Zero on success, negative error code otherwise.
3344  */
3345 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3346 {
3347         return gmap_fault(vcpu->arch.gmap, gpa,
3348                           writable ? FAULT_FLAG_WRITE : 0);
3349 }
3350
3351 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3352                                       unsigned long token)
3353 {
3354         struct kvm_s390_interrupt inti;
3355         struct kvm_s390_irq irq;
3356
3357         if (start_token) {
3358                 irq.u.ext.ext_params2 = token;
3359                 irq.type = KVM_S390_INT_PFAULT_INIT;
3360                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3361         } else {
3362                 inti.type = KVM_S390_INT_PFAULT_DONE;
3363                 inti.parm64 = token;
3364                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3365         }
3366 }
3367
3368 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3369                                      struct kvm_async_pf *work)
3370 {
3371         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3372         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3373 }
3374
3375 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3376                                  struct kvm_async_pf *work)
3377 {
3378         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3379         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3380 }
3381
3382 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3383                                struct kvm_async_pf *work)
3384 {
3385         /* s390 will always inject the page directly */
3386 }
3387
3388 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3389 {
3390         /*
3391          * s390 will always inject the page directly,
3392          * but we still want check_async_completion to cleanup
3393          */
3394         return true;
3395 }
3396
3397 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3398 {
3399         hva_t hva;
3400         struct kvm_arch_async_pf arch;
3401         int rc;
3402
3403         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3404                 return 0;
3405         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3406             vcpu->arch.pfault_compare)
3407                 return 0;
3408         if (psw_extint_disabled(vcpu))
3409                 return 0;
3410         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3411                 return 0;
3412         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3413                 return 0;
3414         if (!vcpu->arch.gmap->pfault_enabled)
3415                 return 0;
3416
3417         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3418         hva += current->thread.gmap_addr & ~PAGE_MASK;
3419         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3420                 return 0;
3421
3422         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3423         return rc;
3424 }
3425
3426 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3427 {
3428         int rc, cpuflags;
3429
3430         /*
3431          * On s390 notifications for arriving pages will be delivered directly
3432          * to the guest but the house keeping for completed pfaults is
3433          * handled outside the worker.
3434          */
3435         kvm_check_async_pf_completion(vcpu);
3436
3437         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3438         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3439
3440         if (need_resched())
3441                 schedule();
3442
3443         if (test_cpu_flag(CIF_MCCK_PENDING))
3444                 s390_handle_mcck();
3445
3446         if (!kvm_is_ucontrol(vcpu->kvm)) {
3447                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3448                 if (rc)
3449                         return rc;
3450         }
3451
3452         rc = kvm_s390_handle_requests(vcpu);
3453         if (rc)
3454                 return rc;
3455
3456         if (guestdbg_enabled(vcpu)) {
3457                 kvm_s390_backup_guest_per_regs(vcpu);
3458                 kvm_s390_patch_guest_per_regs(vcpu);
3459         }
3460
3461         vcpu->arch.sie_block->icptcode = 0;
3462         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3463         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3464         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3465
3466         return 0;
3467 }
3468
3469 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3470 {
3471         struct kvm_s390_pgm_info pgm_info = {
3472                 .code = PGM_ADDRESSING,
3473         };
3474         u8 opcode, ilen;
3475         int rc;
3476
3477         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3478         trace_kvm_s390_sie_fault(vcpu);
3479
3480         /*
3481          * We want to inject an addressing exception, which is defined as a
3482          * suppressing or terminating exception. However, since we came here
3483          * by a DAT access exception, the PSW still points to the faulting
3484          * instruction since DAT exceptions are nullifying. So we've got
3485          * to look up the current opcode to get the length of the instruction
3486          * to be able to forward the PSW.
3487          */
3488         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3489         ilen = insn_length(opcode);
3490         if (rc < 0) {
3491                 return rc;
3492         } else if (rc) {
3493                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3494                  * Forward by arbitrary ilc, injection will take care of
3495                  * nullification if necessary.
3496                  */
3497                 pgm_info = vcpu->arch.pgm;
3498                 ilen = 4;
3499         }
3500         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3501         kvm_s390_forward_psw(vcpu, ilen);
3502         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3503 }
3504
3505 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3506 {
3507         struct mcck_volatile_info *mcck_info;
3508         struct sie_page *sie_page;
3509
3510         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3511                    vcpu->arch.sie_block->icptcode);
3512         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3513
3514         if (guestdbg_enabled(vcpu))
3515                 kvm_s390_restore_guest_per_regs(vcpu);
3516
3517         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3518         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3519
3520         if (exit_reason == -EINTR) {
3521                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3522                 sie_page = container_of(vcpu->arch.sie_block,
3523                                         struct sie_page, sie_block);
3524                 mcck_info = &sie_page->mcck_info;
3525                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3526                 return 0;
3527         }
3528
3529         if (vcpu->arch.sie_block->icptcode > 0) {
3530                 int rc = kvm_handle_sie_intercept(vcpu);
3531
3532                 if (rc != -EOPNOTSUPP)
3533                         return rc;
3534                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3535                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3536                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3537                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3538                 return -EREMOTE;
3539         } else if (exit_reason != -EFAULT) {
3540                 vcpu->stat.exit_null++;
3541                 return 0;
3542         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3543                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3544                 vcpu->run->s390_ucontrol.trans_exc_code =
3545                                                 current->thread.gmap_addr;
3546                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3547                 return -EREMOTE;
3548         } else if (current->thread.gmap_pfault) {
3549                 trace_kvm_s390_major_guest_pfault(vcpu);
3550                 current->thread.gmap_pfault = 0;
3551                 if (kvm_arch_setup_async_pf(vcpu))
3552                         return 0;
3553                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3554         }
3555         return vcpu_post_run_fault_in_sie(vcpu);
3556 }
3557
3558 static int __vcpu_run(struct kvm_vcpu *vcpu)
3559 {
3560         int rc, exit_reason;
3561
3562         /*
3563          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3564          * ning the guest), so that memslots (and other stuff) are protected
3565          */
3566         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3567
3568         do {
3569                 rc = vcpu_pre_run(vcpu);
3570                 if (rc)
3571                         break;
3572
3573                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3574                 /*
3575                  * As PF_VCPU will be used in fault handler, between
3576                  * guest_enter and guest_exit should be no uaccess.
3577                  */
3578                 local_irq_disable();
3579                 guest_enter_irqoff();
3580                 __disable_cpu_timer_accounting(vcpu);
3581                 local_irq_enable();
3582                 exit_reason = sie64a(vcpu->arch.sie_block,
3583                                      vcpu->run->s.regs.gprs);
3584                 local_irq_disable();
3585                 __enable_cpu_timer_accounting(vcpu);
3586                 guest_exit_irqoff();
3587                 local_irq_enable();
3588                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3589
3590                 rc = vcpu_post_run(vcpu, exit_reason);
3591         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3592
3593         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3594         return rc;
3595 }
3596
3597 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3598 {
3599         struct runtime_instr_cb *riccb;
3600         struct gs_cb *gscb;
3601
3602         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3603         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3604         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3605         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3606         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3607                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3608         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3609                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3610                 /* some control register changes require a tlb flush */
3611                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3612         }
3613         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3614                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3615                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3616                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3617                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3618                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3619         }
3620         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3621                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3622                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3623                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3624                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3625                         kvm_clear_async_pf_completion_queue(vcpu);
3626         }
3627         /*
3628          * If userspace sets the riccb (e.g. after migration) to a valid state,
3629          * we should enable RI here instead of doing the lazy enablement.
3630          */
3631         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3632             test_kvm_facility(vcpu->kvm, 64) &&
3633             riccb->v &&
3634             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3635                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3636                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3637         }
3638         /*
3639          * If userspace sets the gscb (e.g. after migration) to non-zero,
3640          * we should enable GS here instead of doing the lazy enablement.
3641          */
3642         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3643             test_kvm_facility(vcpu->kvm, 133) &&
3644             gscb->gssm &&
3645             !vcpu->arch.gs_enabled) {
3646                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3647                 vcpu->arch.sie_block->ecb |= ECB_GS;
3648                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3649                 vcpu->arch.gs_enabled = 1;
3650         }
3651         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3652             test_kvm_facility(vcpu->kvm, 82)) {
3653                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3654                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3655         }
3656         save_access_regs(vcpu->arch.host_acrs);
3657         restore_access_regs(vcpu->run->s.regs.acrs);
3658         /* save host (userspace) fprs/vrs */
3659         save_fpu_regs();
3660         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3661         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3662         if (MACHINE_HAS_VX)
3663                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3664         else
3665                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3666         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3667         if (test_fp_ctl(current->thread.fpu.fpc))
3668                 /* User space provided an invalid FPC, let's clear it */
3669                 current->thread.fpu.fpc = 0;
3670         if (MACHINE_HAS_GS) {
3671                 preempt_disable();
3672                 __ctl_set_bit(2, 4);
3673                 if (current->thread.gs_cb) {
3674                         vcpu->arch.host_gscb = current->thread.gs_cb;
3675                         save_gs_cb(vcpu->arch.host_gscb);
3676                 }
3677                 if (vcpu->arch.gs_enabled) {
3678                         current->thread.gs_cb = (struct gs_cb *)
3679                                                 &vcpu->run->s.regs.gscb;
3680                         restore_gs_cb(current->thread.gs_cb);
3681                 }
3682                 preempt_enable();
3683         }
3684         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3685
3686         kvm_run->kvm_dirty_regs = 0;
3687 }
3688
3689 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3690 {
3691         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3692         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3693         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3694         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3695         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3696         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3697         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3698         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3699         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3700         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3701         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3702         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3703         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3704         save_access_regs(vcpu->run->s.regs.acrs);
3705         restore_access_regs(vcpu->arch.host_acrs);
3706         /* Save guest register state */
3707         save_fpu_regs();
3708         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3709         /* Restore will be done lazily at return */
3710         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3711         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3712         if (MACHINE_HAS_GS) {
3713                 __ctl_set_bit(2, 4);
3714                 if (vcpu->arch.gs_enabled)
3715                         save_gs_cb(current->thread.gs_cb);
3716                 preempt_disable();
3717                 current->thread.gs_cb = vcpu->arch.host_gscb;
3718                 restore_gs_cb(vcpu->arch.host_gscb);
3719                 preempt_enable();
3720                 if (!vcpu->arch.host_gscb)
3721                         __ctl_clear_bit(2, 4);
3722                 vcpu->arch.host_gscb = NULL;
3723         }
3724         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3725 }
3726
3727 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3728 {
3729         int rc;
3730
3731         if (kvm_run->immediate_exit)
3732                 return -EINTR;
3733
3734         vcpu_load(vcpu);
3735
3736         if (guestdbg_exit_pending(vcpu)) {
3737                 kvm_s390_prepare_debug_exit(vcpu);
3738                 rc = 0;
3739                 goto out;
3740         }
3741
3742         kvm_sigset_activate(vcpu);
3743
3744         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3745                 kvm_s390_vcpu_start(vcpu);
3746         } else if (is_vcpu_stopped(vcpu)) {
3747                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3748                                    vcpu->vcpu_id);
3749                 rc = -EINVAL;
3750                 goto out;
3751         }
3752
3753         sync_regs(vcpu, kvm_run);
3754         enable_cpu_timer_accounting(vcpu);
3755
3756         might_fault();
3757         rc = __vcpu_run(vcpu);
3758
3759         if (signal_pending(current) && !rc) {
3760                 kvm_run->exit_reason = KVM_EXIT_INTR;
3761                 rc = -EINTR;
3762         }
3763
3764         if (guestdbg_exit_pending(vcpu) && !rc)  {
3765                 kvm_s390_prepare_debug_exit(vcpu);
3766                 rc = 0;
3767         }
3768
3769         if (rc == -EREMOTE) {
3770                 /* userspace support is needed, kvm_run has been prepared */
3771                 rc = 0;
3772         }
3773
3774         disable_cpu_timer_accounting(vcpu);
3775         store_regs(vcpu, kvm_run);
3776
3777         kvm_sigset_deactivate(vcpu);
3778
3779         vcpu->stat.exit_userspace++;
3780 out:
3781         vcpu_put(vcpu);
3782         return rc;
3783 }
3784
3785 /*
3786  * store status at address
3787  * we use have two special cases:
3788  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3789  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3790  */
3791 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3792 {
3793         unsigned char archmode = 1;
3794         freg_t fprs[NUM_FPRS];
3795         unsigned int px;
3796         u64 clkcomp, cputm;
3797         int rc;
3798
3799         px = kvm_s390_get_prefix(vcpu);
3800         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3801                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3802                         return -EFAULT;
3803                 gpa = 0;
3804         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3805                 if (write_guest_real(vcpu, 163, &archmode, 1))
3806                         return -EFAULT;
3807                 gpa = px;
3808         } else
3809                 gpa -= __LC_FPREGS_SAVE_AREA;
3810
3811         /* manually convert vector registers if necessary */
3812         if (MACHINE_HAS_VX) {
3813                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3814                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3815                                      fprs, 128);
3816         } else {
3817                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3818                                      vcpu->run->s.regs.fprs, 128);
3819         }
3820         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3821                               vcpu->run->s.regs.gprs, 128);
3822         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3823                               &vcpu->arch.sie_block->gpsw, 16);
3824         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3825                               &px, 4);
3826         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3827                               &vcpu->run->s.regs.fpc, 4);
3828         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3829                               &vcpu->arch.sie_block->todpr, 4);
3830         cputm = kvm_s390_get_cpu_timer(vcpu);
3831         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3832                               &cputm, 8);
3833         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3834         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3835                               &clkcomp, 8);
3836         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3837                               &vcpu->run->s.regs.acrs, 64);
3838         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3839                               &vcpu->arch.sie_block->gcr, 128);
3840         return rc ? -EFAULT : 0;
3841 }
3842
3843 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3844 {
3845         /*
3846          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3847          * switch in the run ioctl. Let's update our copies before we save
3848          * it into the save area
3849          */
3850         save_fpu_regs();
3851         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3852         save_access_regs(vcpu->run->s.regs.acrs);
3853
3854         return kvm_s390_store_status_unloaded(vcpu, addr);
3855 }
3856
3857 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3858 {
3859         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3860         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3861 }
3862
3863 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3864 {
3865         unsigned int i;
3866         struct kvm_vcpu *vcpu;
3867
3868         kvm_for_each_vcpu(i, vcpu, kvm) {
3869                 __disable_ibs_on_vcpu(vcpu);
3870         }
3871 }
3872
3873 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3874 {
3875         if (!sclp.has_ibs)
3876                 return;
3877         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3878         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3879 }
3880
3881 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3882 {
3883         int i, online_vcpus, started_vcpus = 0;
3884
3885         if (!is_vcpu_stopped(vcpu))
3886                 return;
3887
3888         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3889         /* Only one cpu at a time may enter/leave the STOPPED state. */
3890         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3891         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3892
3893         for (i = 0; i < online_vcpus; i++) {
3894                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3895                         started_vcpus++;
3896         }
3897
3898         if (started_vcpus == 0) {
3899                 /* we're the only active VCPU -> speed it up */
3900                 __enable_ibs_on_vcpu(vcpu);
3901         } else if (started_vcpus == 1) {
3902                 /*
3903                  * As we are starting a second VCPU, we have to disable
3904                  * the IBS facility on all VCPUs to remove potentially
3905                  * oustanding ENABLE requests.
3906                  */
3907                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3908         }
3909
3910         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3911         /*
3912          * Another VCPU might have used IBS while we were offline.
3913          * Let's play safe and flush the VCPU at startup.
3914          */
3915         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3916         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3917         return;
3918 }
3919
3920 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3921 {
3922         int i, online_vcpus, started_vcpus = 0;
3923         struct kvm_vcpu *started_vcpu = NULL;
3924
3925         if (is_vcpu_stopped(vcpu))
3926                 return;
3927
3928         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3929         /* Only one cpu at a time may enter/leave the STOPPED state. */
3930         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3931         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3932
3933         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3934         kvm_s390_clear_stop_irq(vcpu);
3935
3936         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3937         __disable_ibs_on_vcpu(vcpu);
3938
3939         for (i = 0; i < online_vcpus; i++) {
3940                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3941                         started_vcpus++;
3942                         started_vcpu = vcpu->kvm->vcpus[i];
3943                 }
3944         }
3945
3946         if (started_vcpus == 1) {
3947                 /*
3948                  * As we only have one VCPU left, we want to enable the
3949                  * IBS facility for that VCPU to speed it up.
3950                  */
3951                 __enable_ibs_on_vcpu(started_vcpu);
3952         }
3953
3954         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3955         return;
3956 }
3957
3958 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3959                                      struct kvm_enable_cap *cap)
3960 {
3961         int r;
3962
3963         if (cap->flags)
3964                 return -EINVAL;
3965
3966         switch (cap->cap) {
3967         case KVM_CAP_S390_CSS_SUPPORT:
3968                 if (!vcpu->kvm->arch.css_support) {
3969                         vcpu->kvm->arch.css_support = 1;
3970                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3971                         trace_kvm_s390_enable_css(vcpu->kvm);
3972                 }
3973                 r = 0;
3974                 break;
3975         default:
3976                 r = -EINVAL;
3977                 break;
3978         }
3979         return r;
3980 }
3981
3982 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3983                                   struct kvm_s390_mem_op *mop)
3984 {
3985         void __user *uaddr = (void __user *)mop->buf;
3986         void *tmpbuf = NULL;
3987         int r, srcu_idx;
3988         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3989                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3990
3991         if (mop->flags & ~supported_flags)
3992                 return -EINVAL;
3993
3994         if (mop->size > MEM_OP_MAX_SIZE)
3995                 return -E2BIG;
3996
3997         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3998                 tmpbuf = vmalloc(mop->size);
3999                 if (!tmpbuf)
4000                         return -ENOMEM;
4001         }
4002
4003         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4004
4005         switch (mop->op) {
4006         case KVM_S390_MEMOP_LOGICAL_READ:
4007                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4008                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4009                                             mop->size, GACC_FETCH);
4010                         break;
4011                 }
4012                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4013                 if (r == 0) {
4014                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4015                                 r = -EFAULT;
4016                 }
4017                 break;
4018         case KVM_S390_MEMOP_LOGICAL_WRITE:
4019                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4020                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4021                                             mop->size, GACC_STORE);
4022                         break;
4023                 }
4024                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4025                         r = -EFAULT;
4026                         break;
4027                 }
4028                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4029                 break;
4030         default:
4031                 r = -EINVAL;
4032         }
4033
4034         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4035
4036         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4037                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4038
4039         vfree(tmpbuf);
4040         return r;
4041 }
4042
4043 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4044                                unsigned int ioctl, unsigned long arg)
4045 {
4046         struct kvm_vcpu *vcpu = filp->private_data;
4047         void __user *argp = (void __user *)arg;
4048
4049         switch (ioctl) {
4050         case KVM_S390_IRQ: {
4051                 struct kvm_s390_irq s390irq;
4052
4053                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4054                         return -EFAULT;
4055                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4056         }
4057         case KVM_S390_INTERRUPT: {
4058                 struct kvm_s390_interrupt s390int;
4059                 struct kvm_s390_irq s390irq;
4060
4061                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4062                         return -EFAULT;
4063                 if (s390int_to_s390irq(&s390int, &s390irq))
4064                         return -EINVAL;
4065                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4066         }
4067         }
4068         return -ENOIOCTLCMD;
4069 }
4070
4071 long kvm_arch_vcpu_ioctl(struct file *filp,
4072                          unsigned int ioctl, unsigned long arg)
4073 {
4074         struct kvm_vcpu *vcpu = filp->private_data;
4075         void __user *argp = (void __user *)arg;
4076         int idx;
4077         long r;
4078
4079         vcpu_load(vcpu);
4080
4081         switch (ioctl) {
4082         case KVM_S390_STORE_STATUS:
4083                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4084                 r = kvm_s390_vcpu_store_status(vcpu, arg);
4085                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4086                 break;
4087         case KVM_S390_SET_INITIAL_PSW: {
4088                 psw_t psw;
4089
4090                 r = -EFAULT;
4091                 if (copy_from_user(&psw, argp, sizeof(psw)))
4092                         break;
4093                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4094                 break;
4095         }
4096         case KVM_S390_INITIAL_RESET:
4097                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4098                 break;
4099         case KVM_SET_ONE_REG:
4100         case KVM_GET_ONE_REG: {
4101                 struct kvm_one_reg reg;
4102                 r = -EFAULT;
4103                 if (copy_from_user(&reg, argp, sizeof(reg)))
4104                         break;
4105                 if (ioctl == KVM_SET_ONE_REG)
4106                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4107                 else
4108                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4109                 break;
4110         }
4111 #ifdef CONFIG_KVM_S390_UCONTROL
4112         case KVM_S390_UCAS_MAP: {
4113                 struct kvm_s390_ucas_mapping ucasmap;
4114
4115                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4116                         r = -EFAULT;
4117                         break;
4118                 }
4119
4120                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4121                         r = -EINVAL;
4122                         break;
4123                 }
4124
4125                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4126                                      ucasmap.vcpu_addr, ucasmap.length);
4127                 break;
4128         }
4129         case KVM_S390_UCAS_UNMAP: {
4130                 struct kvm_s390_ucas_mapping ucasmap;
4131
4132                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4133                         r = -EFAULT;
4134                         break;
4135                 }
4136
4137                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4138                         r = -EINVAL;
4139                         break;
4140                 }
4141
4142                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4143                         ucasmap.length);
4144                 break;
4145         }
4146 #endif
4147         case KVM_S390_VCPU_FAULT: {
4148                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4149                 break;
4150         }
4151         case KVM_ENABLE_CAP:
4152         {
4153                 struct kvm_enable_cap cap;
4154                 r = -EFAULT;
4155                 if (copy_from_user(&cap, argp, sizeof(cap)))
4156                         break;
4157                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4158                 break;
4159         }
4160         case KVM_S390_MEM_OP: {
4161                 struct kvm_s390_mem_op mem_op;
4162
4163                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4164                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4165                 else
4166                         r = -EFAULT;
4167                 break;
4168         }
4169         case KVM_S390_SET_IRQ_STATE: {
4170                 struct kvm_s390_irq_state irq_state;
4171
4172                 r = -EFAULT;
4173                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4174                         break;
4175                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4176                     irq_state.len == 0 ||
4177                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4178                         r = -EINVAL;
4179                         break;
4180                 }
4181                 /* do not use irq_state.flags, it will break old QEMUs */
4182                 r = kvm_s390_set_irq_state(vcpu,
4183                                            (void __user *) irq_state.buf,
4184                                            irq_state.len);
4185                 break;
4186         }
4187         case KVM_S390_GET_IRQ_STATE: {
4188                 struct kvm_s390_irq_state irq_state;
4189
4190                 r = -EFAULT;
4191                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4192                         break;
4193                 if (irq_state.len == 0) {
4194                         r = -EINVAL;
4195                         break;
4196                 }
4197                 /* do not use irq_state.flags, it will break old QEMUs */
4198                 r = kvm_s390_get_irq_state(vcpu,
4199                                            (__u8 __user *)  irq_state.buf,
4200                                            irq_state.len);
4201                 break;
4202         }
4203         default:
4204                 r = -ENOTTY;
4205         }
4206
4207         vcpu_put(vcpu);
4208         return r;
4209 }
4210
4211 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4212 {
4213 #ifdef CONFIG_KVM_S390_UCONTROL
4214         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4215                  && (kvm_is_ucontrol(vcpu->kvm))) {
4216                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4217                 get_page(vmf->page);
4218                 return 0;
4219         }
4220 #endif
4221         return VM_FAULT_SIGBUS;
4222 }
4223
4224 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4225                             unsigned long npages)
4226 {
4227         return 0;
4228 }
4229
4230 /* Section: memory related */
4231 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4232                                    struct kvm_memory_slot *memslot,
4233                                    const struct kvm_userspace_memory_region *mem,
4234                                    enum kvm_mr_change change)
4235 {
4236         /* A few sanity checks. We can have memory slots which have to be
4237            located/ended at a segment boundary (1MB). The memory in userland is
4238            ok to be fragmented into various different vmas. It is okay to mmap()
4239            and munmap() stuff in this slot after doing this call at any time */
4240
4241         if (mem->userspace_addr & 0xffffful)
4242                 return -EINVAL;
4243
4244         if (mem->memory_size & 0xffffful)
4245                 return -EINVAL;
4246
4247         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4248                 return -EINVAL;
4249
4250         return 0;
4251 }
4252
4253 void kvm_arch_commit_memory_region(struct kvm *kvm,
4254                                 const struct kvm_userspace_memory_region *mem,
4255                                 const struct kvm_memory_slot *old,
4256                                 const struct kvm_memory_slot *new,
4257                                 enum kvm_mr_change change)
4258 {
4259         int rc;
4260
4261         /* If the basics of the memslot do not change, we do not want
4262          * to update the gmap. Every update causes several unnecessary
4263          * segment translation exceptions. This is usually handled just
4264          * fine by the normal fault handler + gmap, but it will also
4265          * cause faults on the prefix page of running guest CPUs.
4266          */
4267         if (old->userspace_addr == mem->userspace_addr &&
4268             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4269             old->npages * PAGE_SIZE == mem->memory_size)
4270                 return;
4271
4272         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4273                 mem->guest_phys_addr, mem->memory_size);
4274         if (rc)
4275                 pr_warn("failed to commit memory region\n");
4276         return;
4277 }
4278
4279 static inline unsigned long nonhyp_mask(int i)
4280 {
4281         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4282
4283         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4284 }
4285
4286 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4287 {
4288         vcpu->valid_wakeup = false;
4289 }
4290
4291 static int __init kvm_s390_init(void)
4292 {
4293         int i;
4294
4295         if (!sclp.has_sief2) {
4296                 pr_info("SIE not available\n");
4297                 return -ENODEV;
4298         }
4299
4300         if (nested && hpage) {
4301                 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4302                 return -EINVAL;
4303         }
4304
4305         for (i = 0; i < 16; i++)
4306                 kvm_s390_fac_base[i] |=
4307                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4308
4309         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4310 }
4311
4312 static void __exit kvm_s390_exit(void)
4313 {
4314         kvm_exit();
4315 }
4316
4317 module_init(kvm_s390_init);
4318 module_exit(kvm_s390_exit);
4319
4320 /*
4321  * Enable autoloading of the kvm module.
4322  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4323  * since x86 takes a different approach.
4324  */
4325 #include <linux/miscdevice.h>
4326 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4327 MODULE_ALIAS("devname:kvm");