KVM: s390: add SPDX identifiers to the remaining files
[linux-2.6-block.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting zSeries kernel virtual machines
4  *
5  * Copyright IBM Corp. 2008, 2009
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License (version 2 only)
9  * as published by the Free Software Foundation.
10  *
11  *    Author(s): Carsten Otte <cotte@de.ibm.com>
12  *               Christian Borntraeger <borntraeger@de.ibm.com>
13  *               Heiko Carstens <heiko.carstens@de.ibm.com>
14  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
15  *               Jason J. Herne <jjherne@us.ibm.com>
16  */
17
18 #include <linux/compiler.h>
19 #include <linux/err.h>
20 #include <linux/fs.h>
21 #include <linux/hrtimer.h>
22 #include <linux/init.h>
23 #include <linux/kvm.h>
24 #include <linux/kvm_host.h>
25 #include <linux/mman.h>
26 #include <linux/module.h>
27 #include <linux/moduleparam.h>
28 #include <linux/random.h>
29 #include <linux/slab.h>
30 #include <linux/timer.h>
31 #include <linux/vmalloc.h>
32 #include <linux/bitmap.h>
33 #include <linux/sched/signal.h>
34 #include <linux/string.h>
35
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/pgtable.h>
40 #include <asm/gmap.h>
41 #include <asm/nmi.h>
42 #include <asm/switch_to.h>
43 #include <asm/isc.h>
44 #include <asm/sclp.h>
45 #include <asm/cpacf.h>
46 #include <asm/timex.h>
47 #include "kvm-s390.h"
48 #include "gaccess.h"
49
50 #define KMSG_COMPONENT "kvm-s390"
51 #undef pr_fmt
52 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
53
54 #define CREATE_TRACE_POINTS
55 #include "trace.h"
56 #include "trace-s390.h"
57
58 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
59 #define LOCAL_IRQS 32
60 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
61                            (KVM_MAX_VCPUS + LOCAL_IRQS))
62
63 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
64
65 struct kvm_stats_debugfs_item debugfs_entries[] = {
66         { "userspace_handled", VCPU_STAT(exit_userspace) },
67         { "exit_null", VCPU_STAT(exit_null) },
68         { "exit_validity", VCPU_STAT(exit_validity) },
69         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
70         { "exit_external_request", VCPU_STAT(exit_external_request) },
71         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
72         { "exit_instruction", VCPU_STAT(exit_instruction) },
73         { "exit_pei", VCPU_STAT(exit_pei) },
74         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
75         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
76         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
77         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
78         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
79         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
80         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
81         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
82         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
83         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
84         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
85         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
87         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
89         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
93         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
94         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
95         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
96         { "instruction_spx", VCPU_STAT(instruction_spx) },
97         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
98         { "instruction_stap", VCPU_STAT(instruction_stap) },
99         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
100         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
101         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
102         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
103         { "instruction_essa", VCPU_STAT(instruction_essa) },
104         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
105         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
106         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
107         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
108         { "instruction_sie", VCPU_STAT(instruction_sie) },
109         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
110         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
111         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
112         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
113         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
114         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
115         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
116         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
117         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
118         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
119         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
120         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
121         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
122         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
123         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
124         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
125         { "diagnose_10", VCPU_STAT(diagnose_10) },
126         { "diagnose_44", VCPU_STAT(diagnose_44) },
127         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
128         { "diagnose_258", VCPU_STAT(diagnose_258) },
129         { "diagnose_308", VCPU_STAT(diagnose_308) },
130         { "diagnose_500", VCPU_STAT(diagnose_500) },
131         { NULL }
132 };
133
134 struct kvm_s390_tod_clock_ext {
135         __u8 epoch_idx;
136         __u64 tod;
137         __u8 reserved[7];
138 } __packed;
139
140 /* allow nested virtualization in KVM (if enabled by user space) */
141 static int nested;
142 module_param(nested, int, S_IRUGO);
143 MODULE_PARM_DESC(nested, "Nested virtualization support");
144
145 /* upper facilities limit for kvm */
146 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
147
148 unsigned long kvm_s390_fac_list_mask_size(void)
149 {
150         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
151         return ARRAY_SIZE(kvm_s390_fac_list_mask);
152 }
153
154 /* available cpu features supported by kvm */
155 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
156 /* available subfunctions indicated via query / "test bit" */
157 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
158
159 static struct gmap_notifier gmap_notifier;
160 static struct gmap_notifier vsie_gmap_notifier;
161 debug_info_t *kvm_s390_dbf;
162
163 /* Section: not file related */
164 int kvm_arch_hardware_enable(void)
165 {
166         /* every s390 is virtualization enabled ;-) */
167         return 0;
168 }
169
170 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
171                               unsigned long end);
172
173 /*
174  * This callback is executed during stop_machine(). All CPUs are therefore
175  * temporarily stopped. In order not to change guest behavior, we have to
176  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
177  * so a CPU won't be stopped while calculating with the epoch.
178  */
179 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
180                           void *v)
181 {
182         struct kvm *kvm;
183         struct kvm_vcpu *vcpu;
184         int i;
185         unsigned long long *delta = v;
186
187         list_for_each_entry(kvm, &vm_list, vm_list) {
188                 kvm->arch.epoch -= *delta;
189                 kvm_for_each_vcpu(i, vcpu, kvm) {
190                         vcpu->arch.sie_block->epoch -= *delta;
191                         if (vcpu->arch.cputm_enabled)
192                                 vcpu->arch.cputm_start += *delta;
193                         if (vcpu->arch.vsie_block)
194                                 vcpu->arch.vsie_block->epoch -= *delta;
195                 }
196         }
197         return NOTIFY_OK;
198 }
199
200 static struct notifier_block kvm_clock_notifier = {
201         .notifier_call = kvm_clock_sync,
202 };
203
204 int kvm_arch_hardware_setup(void)
205 {
206         gmap_notifier.notifier_call = kvm_gmap_notifier;
207         gmap_register_pte_notifier(&gmap_notifier);
208         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
209         gmap_register_pte_notifier(&vsie_gmap_notifier);
210         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
211                                        &kvm_clock_notifier);
212         return 0;
213 }
214
215 void kvm_arch_hardware_unsetup(void)
216 {
217         gmap_unregister_pte_notifier(&gmap_notifier);
218         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
219         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
220                                          &kvm_clock_notifier);
221 }
222
223 static void allow_cpu_feat(unsigned long nr)
224 {
225         set_bit_inv(nr, kvm_s390_available_cpu_feat);
226 }
227
228 static inline int plo_test_bit(unsigned char nr)
229 {
230         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
231         int cc;
232
233         asm volatile(
234                 /* Parameter registers are ignored for "test bit" */
235                 "       plo     0,0,0,0(0)\n"
236                 "       ipm     %0\n"
237                 "       srl     %0,28\n"
238                 : "=d" (cc)
239                 : "d" (r0)
240                 : "cc");
241         return cc == 0;
242 }
243
244 static void kvm_s390_cpu_feat_init(void)
245 {
246         int i;
247
248         for (i = 0; i < 256; ++i) {
249                 if (plo_test_bit(i))
250                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
251         }
252
253         if (test_facility(28)) /* TOD-clock steering */
254                 ptff(kvm_s390_available_subfunc.ptff,
255                      sizeof(kvm_s390_available_subfunc.ptff),
256                      PTFF_QAF);
257
258         if (test_facility(17)) { /* MSA */
259                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
260                               kvm_s390_available_subfunc.kmac);
261                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
262                               kvm_s390_available_subfunc.kmc);
263                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.km);
265                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
266                               kvm_s390_available_subfunc.kimd);
267                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.klmd);
269         }
270         if (test_facility(76)) /* MSA3 */
271                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
272                               kvm_s390_available_subfunc.pckmo);
273         if (test_facility(77)) { /* MSA4 */
274                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
275                               kvm_s390_available_subfunc.kmctr);
276                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
277                               kvm_s390_available_subfunc.kmf);
278                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
279                               kvm_s390_available_subfunc.kmo);
280                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
281                               kvm_s390_available_subfunc.pcc);
282         }
283         if (test_facility(57)) /* MSA5 */
284                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
285                               kvm_s390_available_subfunc.ppno);
286
287         if (test_facility(146)) /* MSA8 */
288                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
289                               kvm_s390_available_subfunc.kma);
290
291         if (MACHINE_HAS_ESOP)
292                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
293         /*
294          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
295          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
296          */
297         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
298             !test_facility(3) || !nested)
299                 return;
300         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
301         if (sclp.has_64bscao)
302                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
303         if (sclp.has_siif)
304                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
305         if (sclp.has_gpere)
306                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
307         if (sclp.has_gsls)
308                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
309         if (sclp.has_ib)
310                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
311         if (sclp.has_cei)
312                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
313         if (sclp.has_ibs)
314                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
315         if (sclp.has_kss)
316                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
317         /*
318          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
319          * all skey handling functions read/set the skey from the PGSTE
320          * instead of the real storage key.
321          *
322          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
323          * pages being detected as preserved although they are resident.
324          *
325          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
326          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
327          *
328          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
329          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
330          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
331          *
332          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
333          * cannot easily shadow the SCA because of the ipte lock.
334          */
335 }
336
337 int kvm_arch_init(void *opaque)
338 {
339         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
340         if (!kvm_s390_dbf)
341                 return -ENOMEM;
342
343         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
344                 debug_unregister(kvm_s390_dbf);
345                 return -ENOMEM;
346         }
347
348         kvm_s390_cpu_feat_init();
349
350         /* Register floating interrupt controller interface. */
351         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
352 }
353
354 void kvm_arch_exit(void)
355 {
356         debug_unregister(kvm_s390_dbf);
357 }
358
359 /* Section: device related */
360 long kvm_arch_dev_ioctl(struct file *filp,
361                         unsigned int ioctl, unsigned long arg)
362 {
363         if (ioctl == KVM_S390_ENABLE_SIE)
364                 return s390_enable_sie();
365         return -EINVAL;
366 }
367
368 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
369 {
370         int r;
371
372         switch (ext) {
373         case KVM_CAP_S390_PSW:
374         case KVM_CAP_S390_GMAP:
375         case KVM_CAP_SYNC_MMU:
376 #ifdef CONFIG_KVM_S390_UCONTROL
377         case KVM_CAP_S390_UCONTROL:
378 #endif
379         case KVM_CAP_ASYNC_PF:
380         case KVM_CAP_SYNC_REGS:
381         case KVM_CAP_ONE_REG:
382         case KVM_CAP_ENABLE_CAP:
383         case KVM_CAP_S390_CSS_SUPPORT:
384         case KVM_CAP_IOEVENTFD:
385         case KVM_CAP_DEVICE_CTRL:
386         case KVM_CAP_ENABLE_CAP_VM:
387         case KVM_CAP_S390_IRQCHIP:
388         case KVM_CAP_VM_ATTRIBUTES:
389         case KVM_CAP_MP_STATE:
390         case KVM_CAP_IMMEDIATE_EXIT:
391         case KVM_CAP_S390_INJECT_IRQ:
392         case KVM_CAP_S390_USER_SIGP:
393         case KVM_CAP_S390_USER_STSI:
394         case KVM_CAP_S390_SKEYS:
395         case KVM_CAP_S390_IRQ_STATE:
396         case KVM_CAP_S390_USER_INSTR0:
397         case KVM_CAP_S390_CMMA_MIGRATION:
398         case KVM_CAP_S390_AIS:
399         case KVM_CAP_S390_AIS_MIGRATION:
400                 r = 1;
401                 break;
402         case KVM_CAP_S390_MEM_OP:
403                 r = MEM_OP_MAX_SIZE;
404                 break;
405         case KVM_CAP_NR_VCPUS:
406         case KVM_CAP_MAX_VCPUS:
407                 r = KVM_S390_BSCA_CPU_SLOTS;
408                 if (!kvm_s390_use_sca_entries())
409                         r = KVM_MAX_VCPUS;
410                 else if (sclp.has_esca && sclp.has_64bscao)
411                         r = KVM_S390_ESCA_CPU_SLOTS;
412                 break;
413         case KVM_CAP_NR_MEMSLOTS:
414                 r = KVM_USER_MEM_SLOTS;
415                 break;
416         case KVM_CAP_S390_COW:
417                 r = MACHINE_HAS_ESOP;
418                 break;
419         case KVM_CAP_S390_VECTOR_REGISTERS:
420                 r = MACHINE_HAS_VX;
421                 break;
422         case KVM_CAP_S390_RI:
423                 r = test_facility(64);
424                 break;
425         case KVM_CAP_S390_GS:
426                 r = test_facility(133);
427                 break;
428         default:
429                 r = 0;
430         }
431         return r;
432 }
433
434 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
435                                         struct kvm_memory_slot *memslot)
436 {
437         gfn_t cur_gfn, last_gfn;
438         unsigned long address;
439         struct gmap *gmap = kvm->arch.gmap;
440
441         /* Loop over all guest pages */
442         last_gfn = memslot->base_gfn + memslot->npages;
443         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
444                 address = gfn_to_hva_memslot(memslot, cur_gfn);
445
446                 if (test_and_clear_guest_dirty(gmap->mm, address))
447                         mark_page_dirty(kvm, cur_gfn);
448                 if (fatal_signal_pending(current))
449                         return;
450                 cond_resched();
451         }
452 }
453
454 /* Section: vm related */
455 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
456
457 /*
458  * Get (and clear) the dirty memory log for a memory slot.
459  */
460 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
461                                struct kvm_dirty_log *log)
462 {
463         int r;
464         unsigned long n;
465         struct kvm_memslots *slots;
466         struct kvm_memory_slot *memslot;
467         int is_dirty = 0;
468
469         if (kvm_is_ucontrol(kvm))
470                 return -EINVAL;
471
472         mutex_lock(&kvm->slots_lock);
473
474         r = -EINVAL;
475         if (log->slot >= KVM_USER_MEM_SLOTS)
476                 goto out;
477
478         slots = kvm_memslots(kvm);
479         memslot = id_to_memslot(slots, log->slot);
480         r = -ENOENT;
481         if (!memslot->dirty_bitmap)
482                 goto out;
483
484         kvm_s390_sync_dirty_log(kvm, memslot);
485         r = kvm_get_dirty_log(kvm, log, &is_dirty);
486         if (r)
487                 goto out;
488
489         /* Clear the dirty log */
490         if (is_dirty) {
491                 n = kvm_dirty_bitmap_bytes(memslot);
492                 memset(memslot->dirty_bitmap, 0, n);
493         }
494         r = 0;
495 out:
496         mutex_unlock(&kvm->slots_lock);
497         return r;
498 }
499
500 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
501 {
502         unsigned int i;
503         struct kvm_vcpu *vcpu;
504
505         kvm_for_each_vcpu(i, vcpu, kvm) {
506                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
507         }
508 }
509
510 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
511 {
512         int r;
513
514         if (cap->flags)
515                 return -EINVAL;
516
517         switch (cap->cap) {
518         case KVM_CAP_S390_IRQCHIP:
519                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
520                 kvm->arch.use_irqchip = 1;
521                 r = 0;
522                 break;
523         case KVM_CAP_S390_USER_SIGP:
524                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
525                 kvm->arch.user_sigp = 1;
526                 r = 0;
527                 break;
528         case KVM_CAP_S390_VECTOR_REGISTERS:
529                 mutex_lock(&kvm->lock);
530                 if (kvm->created_vcpus) {
531                         r = -EBUSY;
532                 } else if (MACHINE_HAS_VX) {
533                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
534                         set_kvm_facility(kvm->arch.model.fac_list, 129);
535                         if (test_facility(134)) {
536                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
537                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
538                         }
539                         if (test_facility(135)) {
540                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
541                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
542                         }
543                         r = 0;
544                 } else
545                         r = -EINVAL;
546                 mutex_unlock(&kvm->lock);
547                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
548                          r ? "(not available)" : "(success)");
549                 break;
550         case KVM_CAP_S390_RI:
551                 r = -EINVAL;
552                 mutex_lock(&kvm->lock);
553                 if (kvm->created_vcpus) {
554                         r = -EBUSY;
555                 } else if (test_facility(64)) {
556                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
557                         set_kvm_facility(kvm->arch.model.fac_list, 64);
558                         r = 0;
559                 }
560                 mutex_unlock(&kvm->lock);
561                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
562                          r ? "(not available)" : "(success)");
563                 break;
564         case KVM_CAP_S390_AIS:
565                 mutex_lock(&kvm->lock);
566                 if (kvm->created_vcpus) {
567                         r = -EBUSY;
568                 } else {
569                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
570                         set_kvm_facility(kvm->arch.model.fac_list, 72);
571                         r = 0;
572                 }
573                 mutex_unlock(&kvm->lock);
574                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
575                          r ? "(not available)" : "(success)");
576                 break;
577         case KVM_CAP_S390_GS:
578                 r = -EINVAL;
579                 mutex_lock(&kvm->lock);
580                 if (atomic_read(&kvm->online_vcpus)) {
581                         r = -EBUSY;
582                 } else if (test_facility(133)) {
583                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
584                         set_kvm_facility(kvm->arch.model.fac_list, 133);
585                         r = 0;
586                 }
587                 mutex_unlock(&kvm->lock);
588                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
589                          r ? "(not available)" : "(success)");
590                 break;
591         case KVM_CAP_S390_USER_STSI:
592                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
593                 kvm->arch.user_stsi = 1;
594                 r = 0;
595                 break;
596         case KVM_CAP_S390_USER_INSTR0:
597                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
598                 kvm->arch.user_instr0 = 1;
599                 icpt_operexc_on_all_vcpus(kvm);
600                 r = 0;
601                 break;
602         default:
603                 r = -EINVAL;
604                 break;
605         }
606         return r;
607 }
608
609 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
610 {
611         int ret;
612
613         switch (attr->attr) {
614         case KVM_S390_VM_MEM_LIMIT_SIZE:
615                 ret = 0;
616                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
617                          kvm->arch.mem_limit);
618                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
619                         ret = -EFAULT;
620                 break;
621         default:
622                 ret = -ENXIO;
623                 break;
624         }
625         return ret;
626 }
627
628 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
629 {
630         int ret;
631         unsigned int idx;
632         switch (attr->attr) {
633         case KVM_S390_VM_MEM_ENABLE_CMMA:
634                 ret = -ENXIO;
635                 if (!sclp.has_cmma)
636                         break;
637
638                 ret = -EBUSY;
639                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
640                 mutex_lock(&kvm->lock);
641                 if (!kvm->created_vcpus) {
642                         kvm->arch.use_cmma = 1;
643                         ret = 0;
644                 }
645                 mutex_unlock(&kvm->lock);
646                 break;
647         case KVM_S390_VM_MEM_CLR_CMMA:
648                 ret = -ENXIO;
649                 if (!sclp.has_cmma)
650                         break;
651                 ret = -EINVAL;
652                 if (!kvm->arch.use_cmma)
653                         break;
654
655                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
656                 mutex_lock(&kvm->lock);
657                 idx = srcu_read_lock(&kvm->srcu);
658                 s390_reset_cmma(kvm->arch.gmap->mm);
659                 srcu_read_unlock(&kvm->srcu, idx);
660                 mutex_unlock(&kvm->lock);
661                 ret = 0;
662                 break;
663         case KVM_S390_VM_MEM_LIMIT_SIZE: {
664                 unsigned long new_limit;
665
666                 if (kvm_is_ucontrol(kvm))
667                         return -EINVAL;
668
669                 if (get_user(new_limit, (u64 __user *)attr->addr))
670                         return -EFAULT;
671
672                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
673                     new_limit > kvm->arch.mem_limit)
674                         return -E2BIG;
675
676                 if (!new_limit)
677                         return -EINVAL;
678
679                 /* gmap_create takes last usable address */
680                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
681                         new_limit -= 1;
682
683                 ret = -EBUSY;
684                 mutex_lock(&kvm->lock);
685                 if (!kvm->created_vcpus) {
686                         /* gmap_create will round the limit up */
687                         struct gmap *new = gmap_create(current->mm, new_limit);
688
689                         if (!new) {
690                                 ret = -ENOMEM;
691                         } else {
692                                 gmap_remove(kvm->arch.gmap);
693                                 new->private = kvm;
694                                 kvm->arch.gmap = new;
695                                 ret = 0;
696                         }
697                 }
698                 mutex_unlock(&kvm->lock);
699                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
700                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
701                          (void *) kvm->arch.gmap->asce);
702                 break;
703         }
704         default:
705                 ret = -ENXIO;
706                 break;
707         }
708         return ret;
709 }
710
711 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
712
713 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
714 {
715         struct kvm_vcpu *vcpu;
716         int i;
717
718         if (!test_kvm_facility(kvm, 76))
719                 return -EINVAL;
720
721         mutex_lock(&kvm->lock);
722         switch (attr->attr) {
723         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
724                 get_random_bytes(
725                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
726                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
727                 kvm->arch.crypto.aes_kw = 1;
728                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
729                 break;
730         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
731                 get_random_bytes(
732                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
733                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
734                 kvm->arch.crypto.dea_kw = 1;
735                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
736                 break;
737         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
738                 kvm->arch.crypto.aes_kw = 0;
739                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
740                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
741                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
742                 break;
743         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
744                 kvm->arch.crypto.dea_kw = 0;
745                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
746                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
747                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
748                 break;
749         default:
750                 mutex_unlock(&kvm->lock);
751                 return -ENXIO;
752         }
753
754         kvm_for_each_vcpu(i, vcpu, kvm) {
755                 kvm_s390_vcpu_crypto_setup(vcpu);
756                 exit_sie(vcpu);
757         }
758         mutex_unlock(&kvm->lock);
759         return 0;
760 }
761
762 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
763 {
764         int cx;
765         struct kvm_vcpu *vcpu;
766
767         kvm_for_each_vcpu(cx, vcpu, kvm)
768                 kvm_s390_sync_request(req, vcpu);
769 }
770
771 /*
772  * Must be called with kvm->srcu held to avoid races on memslots, and with
773  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
774  */
775 static int kvm_s390_vm_start_migration(struct kvm *kvm)
776 {
777         struct kvm_s390_migration_state *mgs;
778         struct kvm_memory_slot *ms;
779         /* should be the only one */
780         struct kvm_memslots *slots;
781         unsigned long ram_pages;
782         int slotnr;
783
784         /* migration mode already enabled */
785         if (kvm->arch.migration_state)
786                 return 0;
787
788         slots = kvm_memslots(kvm);
789         if (!slots || !slots->used_slots)
790                 return -EINVAL;
791
792         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
793         if (!mgs)
794                 return -ENOMEM;
795         kvm->arch.migration_state = mgs;
796
797         if (kvm->arch.use_cmma) {
798                 /*
799                  * Get the last slot. They should be sorted by base_gfn, so the
800                  * last slot is also the one at the end of the address space.
801                  * We have verified above that at least one slot is present.
802                  */
803                 ms = slots->memslots + slots->used_slots - 1;
804                 /* round up so we only use full longs */
805                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
806                 /* allocate enough bytes to store all the bits */
807                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
808                 if (!mgs->pgste_bitmap) {
809                         kfree(mgs);
810                         kvm->arch.migration_state = NULL;
811                         return -ENOMEM;
812                 }
813
814                 mgs->bitmap_size = ram_pages;
815                 atomic64_set(&mgs->dirty_pages, ram_pages);
816                 /* mark all the pages in active slots as dirty */
817                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
818                         ms = slots->memslots + slotnr;
819                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
820                 }
821
822                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
823         }
824         return 0;
825 }
826
827 /*
828  * Must be called with kvm->lock to avoid races with ourselves and
829  * kvm_s390_vm_start_migration.
830  */
831 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
832 {
833         struct kvm_s390_migration_state *mgs;
834
835         /* migration mode already disabled */
836         if (!kvm->arch.migration_state)
837                 return 0;
838         mgs = kvm->arch.migration_state;
839         kvm->arch.migration_state = NULL;
840
841         if (kvm->arch.use_cmma) {
842                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
843                 vfree(mgs->pgste_bitmap);
844         }
845         kfree(mgs);
846         return 0;
847 }
848
849 static int kvm_s390_vm_set_migration(struct kvm *kvm,
850                                      struct kvm_device_attr *attr)
851 {
852         int idx, res = -ENXIO;
853
854         mutex_lock(&kvm->lock);
855         switch (attr->attr) {
856         case KVM_S390_VM_MIGRATION_START:
857                 idx = srcu_read_lock(&kvm->srcu);
858                 res = kvm_s390_vm_start_migration(kvm);
859                 srcu_read_unlock(&kvm->srcu, idx);
860                 break;
861         case KVM_S390_VM_MIGRATION_STOP:
862                 res = kvm_s390_vm_stop_migration(kvm);
863                 break;
864         default:
865                 break;
866         }
867         mutex_unlock(&kvm->lock);
868
869         return res;
870 }
871
872 static int kvm_s390_vm_get_migration(struct kvm *kvm,
873                                      struct kvm_device_attr *attr)
874 {
875         u64 mig = (kvm->arch.migration_state != NULL);
876
877         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
878                 return -ENXIO;
879
880         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
881                 return -EFAULT;
882         return 0;
883 }
884
885 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
886 {
887         struct kvm_s390_vm_tod_clock gtod;
888
889         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
890                 return -EFAULT;
891
892         if (test_kvm_facility(kvm, 139))
893                 kvm_s390_set_tod_clock_ext(kvm, &gtod);
894         else if (gtod.epoch_idx == 0)
895                 kvm_s390_set_tod_clock(kvm, gtod.tod);
896         else
897                 return -EINVAL;
898
899         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
900                 gtod.epoch_idx, gtod.tod);
901
902         return 0;
903 }
904
905 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
906 {
907         u8 gtod_high;
908
909         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
910                                            sizeof(gtod_high)))
911                 return -EFAULT;
912
913         if (gtod_high != 0)
914                 return -EINVAL;
915         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
916
917         return 0;
918 }
919
920 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
921 {
922         u64 gtod;
923
924         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
925                 return -EFAULT;
926
927         kvm_s390_set_tod_clock(kvm, gtod);
928         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
929         return 0;
930 }
931
932 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
933 {
934         int ret;
935
936         if (attr->flags)
937                 return -EINVAL;
938
939         switch (attr->attr) {
940         case KVM_S390_VM_TOD_EXT:
941                 ret = kvm_s390_set_tod_ext(kvm, attr);
942                 break;
943         case KVM_S390_VM_TOD_HIGH:
944                 ret = kvm_s390_set_tod_high(kvm, attr);
945                 break;
946         case KVM_S390_VM_TOD_LOW:
947                 ret = kvm_s390_set_tod_low(kvm, attr);
948                 break;
949         default:
950                 ret = -ENXIO;
951                 break;
952         }
953         return ret;
954 }
955
956 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
957                                         struct kvm_s390_vm_tod_clock *gtod)
958 {
959         struct kvm_s390_tod_clock_ext htod;
960
961         preempt_disable();
962
963         get_tod_clock_ext((char *)&htod);
964
965         gtod->tod = htod.tod + kvm->arch.epoch;
966         gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
967
968         if (gtod->tod < htod.tod)
969                 gtod->epoch_idx += 1;
970
971         preempt_enable();
972 }
973
974 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
975 {
976         struct kvm_s390_vm_tod_clock gtod;
977
978         memset(&gtod, 0, sizeof(gtod));
979
980         if (test_kvm_facility(kvm, 139))
981                 kvm_s390_get_tod_clock_ext(kvm, &gtod);
982         else
983                 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
984
985         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
986                 return -EFAULT;
987
988         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
989                 gtod.epoch_idx, gtod.tod);
990         return 0;
991 }
992
993 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
994 {
995         u8 gtod_high = 0;
996
997         if (copy_to_user((void __user *)attr->addr, &gtod_high,
998                                          sizeof(gtod_high)))
999                 return -EFAULT;
1000         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1001
1002         return 0;
1003 }
1004
1005 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1006 {
1007         u64 gtod;
1008
1009         gtod = kvm_s390_get_tod_clock_fast(kvm);
1010         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1011                 return -EFAULT;
1012         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1013
1014         return 0;
1015 }
1016
1017 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1018 {
1019         int ret;
1020
1021         if (attr->flags)
1022                 return -EINVAL;
1023
1024         switch (attr->attr) {
1025         case KVM_S390_VM_TOD_EXT:
1026                 ret = kvm_s390_get_tod_ext(kvm, attr);
1027                 break;
1028         case KVM_S390_VM_TOD_HIGH:
1029                 ret = kvm_s390_get_tod_high(kvm, attr);
1030                 break;
1031         case KVM_S390_VM_TOD_LOW:
1032                 ret = kvm_s390_get_tod_low(kvm, attr);
1033                 break;
1034         default:
1035                 ret = -ENXIO;
1036                 break;
1037         }
1038         return ret;
1039 }
1040
1041 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1042 {
1043         struct kvm_s390_vm_cpu_processor *proc;
1044         u16 lowest_ibc, unblocked_ibc;
1045         int ret = 0;
1046
1047         mutex_lock(&kvm->lock);
1048         if (kvm->created_vcpus) {
1049                 ret = -EBUSY;
1050                 goto out;
1051         }
1052         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1053         if (!proc) {
1054                 ret = -ENOMEM;
1055                 goto out;
1056         }
1057         if (!copy_from_user(proc, (void __user *)attr->addr,
1058                             sizeof(*proc))) {
1059                 kvm->arch.model.cpuid = proc->cpuid;
1060                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1061                 unblocked_ibc = sclp.ibc & 0xfff;
1062                 if (lowest_ibc && proc->ibc) {
1063                         if (proc->ibc > unblocked_ibc)
1064                                 kvm->arch.model.ibc = unblocked_ibc;
1065                         else if (proc->ibc < lowest_ibc)
1066                                 kvm->arch.model.ibc = lowest_ibc;
1067                         else
1068                                 kvm->arch.model.ibc = proc->ibc;
1069                 }
1070                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1071                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1072                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1073                          kvm->arch.model.ibc,
1074                          kvm->arch.model.cpuid);
1075                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1076                          kvm->arch.model.fac_list[0],
1077                          kvm->arch.model.fac_list[1],
1078                          kvm->arch.model.fac_list[2]);
1079         } else
1080                 ret = -EFAULT;
1081         kfree(proc);
1082 out:
1083         mutex_unlock(&kvm->lock);
1084         return ret;
1085 }
1086
1087 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1088                                        struct kvm_device_attr *attr)
1089 {
1090         struct kvm_s390_vm_cpu_feat data;
1091         int ret = -EBUSY;
1092
1093         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1094                 return -EFAULT;
1095         if (!bitmap_subset((unsigned long *) data.feat,
1096                            kvm_s390_available_cpu_feat,
1097                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1098                 return -EINVAL;
1099
1100         mutex_lock(&kvm->lock);
1101         if (!atomic_read(&kvm->online_vcpus)) {
1102                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1103                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1104                 ret = 0;
1105         }
1106         mutex_unlock(&kvm->lock);
1107         return ret;
1108 }
1109
1110 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1111                                           struct kvm_device_attr *attr)
1112 {
1113         /*
1114          * Once supported by kernel + hw, we have to store the subfunctions
1115          * in kvm->arch and remember that user space configured them.
1116          */
1117         return -ENXIO;
1118 }
1119
1120 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1121 {
1122         int ret = -ENXIO;
1123
1124         switch (attr->attr) {
1125         case KVM_S390_VM_CPU_PROCESSOR:
1126                 ret = kvm_s390_set_processor(kvm, attr);
1127                 break;
1128         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1129                 ret = kvm_s390_set_processor_feat(kvm, attr);
1130                 break;
1131         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1132                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1133                 break;
1134         }
1135         return ret;
1136 }
1137
1138 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140         struct kvm_s390_vm_cpu_processor *proc;
1141         int ret = 0;
1142
1143         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1144         if (!proc) {
1145                 ret = -ENOMEM;
1146                 goto out;
1147         }
1148         proc->cpuid = kvm->arch.model.cpuid;
1149         proc->ibc = kvm->arch.model.ibc;
1150         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1151                S390_ARCH_FAC_LIST_SIZE_BYTE);
1152         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1153                  kvm->arch.model.ibc,
1154                  kvm->arch.model.cpuid);
1155         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1156                  kvm->arch.model.fac_list[0],
1157                  kvm->arch.model.fac_list[1],
1158                  kvm->arch.model.fac_list[2]);
1159         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1160                 ret = -EFAULT;
1161         kfree(proc);
1162 out:
1163         return ret;
1164 }
1165
1166 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168         struct kvm_s390_vm_cpu_machine *mach;
1169         int ret = 0;
1170
1171         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1172         if (!mach) {
1173                 ret = -ENOMEM;
1174                 goto out;
1175         }
1176         get_cpu_id((struct cpuid *) &mach->cpuid);
1177         mach->ibc = sclp.ibc;
1178         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1179                S390_ARCH_FAC_LIST_SIZE_BYTE);
1180         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1181                sizeof(S390_lowcore.stfle_fac_list));
1182         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1183                  kvm->arch.model.ibc,
1184                  kvm->arch.model.cpuid);
1185         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1186                  mach->fac_mask[0],
1187                  mach->fac_mask[1],
1188                  mach->fac_mask[2]);
1189         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1190                  mach->fac_list[0],
1191                  mach->fac_list[1],
1192                  mach->fac_list[2]);
1193         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1194                 ret = -EFAULT;
1195         kfree(mach);
1196 out:
1197         return ret;
1198 }
1199
1200 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1201                                        struct kvm_device_attr *attr)
1202 {
1203         struct kvm_s390_vm_cpu_feat data;
1204
1205         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1206                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1207         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1208                 return -EFAULT;
1209         return 0;
1210 }
1211
1212 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1213                                      struct kvm_device_attr *attr)
1214 {
1215         struct kvm_s390_vm_cpu_feat data;
1216
1217         bitmap_copy((unsigned long *) data.feat,
1218                     kvm_s390_available_cpu_feat,
1219                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1220         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1221                 return -EFAULT;
1222         return 0;
1223 }
1224
1225 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1226                                           struct kvm_device_attr *attr)
1227 {
1228         /*
1229          * Once we can actually configure subfunctions (kernel + hw support),
1230          * we have to check if they were already set by user space, if so copy
1231          * them from kvm->arch.
1232          */
1233         return -ENXIO;
1234 }
1235
1236 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1237                                         struct kvm_device_attr *attr)
1238 {
1239         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1240             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1241                 return -EFAULT;
1242         return 0;
1243 }
1244 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1245 {
1246         int ret = -ENXIO;
1247
1248         switch (attr->attr) {
1249         case KVM_S390_VM_CPU_PROCESSOR:
1250                 ret = kvm_s390_get_processor(kvm, attr);
1251                 break;
1252         case KVM_S390_VM_CPU_MACHINE:
1253                 ret = kvm_s390_get_machine(kvm, attr);
1254                 break;
1255         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1256                 ret = kvm_s390_get_processor_feat(kvm, attr);
1257                 break;
1258         case KVM_S390_VM_CPU_MACHINE_FEAT:
1259                 ret = kvm_s390_get_machine_feat(kvm, attr);
1260                 break;
1261         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1262                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1263                 break;
1264         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1265                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1266                 break;
1267         }
1268         return ret;
1269 }
1270
1271 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1272 {
1273         int ret;
1274
1275         switch (attr->group) {
1276         case KVM_S390_VM_MEM_CTRL:
1277                 ret = kvm_s390_set_mem_control(kvm, attr);
1278                 break;
1279         case KVM_S390_VM_TOD:
1280                 ret = kvm_s390_set_tod(kvm, attr);
1281                 break;
1282         case KVM_S390_VM_CPU_MODEL:
1283                 ret = kvm_s390_set_cpu_model(kvm, attr);
1284                 break;
1285         case KVM_S390_VM_CRYPTO:
1286                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1287                 break;
1288         case KVM_S390_VM_MIGRATION:
1289                 ret = kvm_s390_vm_set_migration(kvm, attr);
1290                 break;
1291         default:
1292                 ret = -ENXIO;
1293                 break;
1294         }
1295
1296         return ret;
1297 }
1298
1299 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1300 {
1301         int ret;
1302
1303         switch (attr->group) {
1304         case KVM_S390_VM_MEM_CTRL:
1305                 ret = kvm_s390_get_mem_control(kvm, attr);
1306                 break;
1307         case KVM_S390_VM_TOD:
1308                 ret = kvm_s390_get_tod(kvm, attr);
1309                 break;
1310         case KVM_S390_VM_CPU_MODEL:
1311                 ret = kvm_s390_get_cpu_model(kvm, attr);
1312                 break;
1313         case KVM_S390_VM_MIGRATION:
1314                 ret = kvm_s390_vm_get_migration(kvm, attr);
1315                 break;
1316         default:
1317                 ret = -ENXIO;
1318                 break;
1319         }
1320
1321         return ret;
1322 }
1323
1324 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1325 {
1326         int ret;
1327
1328         switch (attr->group) {
1329         case KVM_S390_VM_MEM_CTRL:
1330                 switch (attr->attr) {
1331                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1332                 case KVM_S390_VM_MEM_CLR_CMMA:
1333                         ret = sclp.has_cmma ? 0 : -ENXIO;
1334                         break;
1335                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1336                         ret = 0;
1337                         break;
1338                 default:
1339                         ret = -ENXIO;
1340                         break;
1341                 }
1342                 break;
1343         case KVM_S390_VM_TOD:
1344                 switch (attr->attr) {
1345                 case KVM_S390_VM_TOD_LOW:
1346                 case KVM_S390_VM_TOD_HIGH:
1347                         ret = 0;
1348                         break;
1349                 default:
1350                         ret = -ENXIO;
1351                         break;
1352                 }
1353                 break;
1354         case KVM_S390_VM_CPU_MODEL:
1355                 switch (attr->attr) {
1356                 case KVM_S390_VM_CPU_PROCESSOR:
1357                 case KVM_S390_VM_CPU_MACHINE:
1358                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1359                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1360                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1361                         ret = 0;
1362                         break;
1363                 /* configuring subfunctions is not supported yet */
1364                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1365                 default:
1366                         ret = -ENXIO;
1367                         break;
1368                 }
1369                 break;
1370         case KVM_S390_VM_CRYPTO:
1371                 switch (attr->attr) {
1372                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1373                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1374                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1375                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1376                         ret = 0;
1377                         break;
1378                 default:
1379                         ret = -ENXIO;
1380                         break;
1381                 }
1382                 break;
1383         case KVM_S390_VM_MIGRATION:
1384                 ret = 0;
1385                 break;
1386         default:
1387                 ret = -ENXIO;
1388                 break;
1389         }
1390
1391         return ret;
1392 }
1393
1394 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1395 {
1396         uint8_t *keys;
1397         uint64_t hva;
1398         int srcu_idx, i, r = 0;
1399
1400         if (args->flags != 0)
1401                 return -EINVAL;
1402
1403         /* Is this guest using storage keys? */
1404         if (!mm_use_skey(current->mm))
1405                 return KVM_S390_GET_SKEYS_NONE;
1406
1407         /* Enforce sane limit on memory allocation */
1408         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1409                 return -EINVAL;
1410
1411         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1412         if (!keys)
1413                 return -ENOMEM;
1414
1415         down_read(&current->mm->mmap_sem);
1416         srcu_idx = srcu_read_lock(&kvm->srcu);
1417         for (i = 0; i < args->count; i++) {
1418                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1419                 if (kvm_is_error_hva(hva)) {
1420                         r = -EFAULT;
1421                         break;
1422                 }
1423
1424                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1425                 if (r)
1426                         break;
1427         }
1428         srcu_read_unlock(&kvm->srcu, srcu_idx);
1429         up_read(&current->mm->mmap_sem);
1430
1431         if (!r) {
1432                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1433                                  sizeof(uint8_t) * args->count);
1434                 if (r)
1435                         r = -EFAULT;
1436         }
1437
1438         kvfree(keys);
1439         return r;
1440 }
1441
1442 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1443 {
1444         uint8_t *keys;
1445         uint64_t hva;
1446         int srcu_idx, i, r = 0;
1447
1448         if (args->flags != 0)
1449                 return -EINVAL;
1450
1451         /* Enforce sane limit on memory allocation */
1452         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1453                 return -EINVAL;
1454
1455         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1456         if (!keys)
1457                 return -ENOMEM;
1458
1459         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1460                            sizeof(uint8_t) * args->count);
1461         if (r) {
1462                 r = -EFAULT;
1463                 goto out;
1464         }
1465
1466         /* Enable storage key handling for the guest */
1467         r = s390_enable_skey();
1468         if (r)
1469                 goto out;
1470
1471         down_read(&current->mm->mmap_sem);
1472         srcu_idx = srcu_read_lock(&kvm->srcu);
1473         for (i = 0; i < args->count; i++) {
1474                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1475                 if (kvm_is_error_hva(hva)) {
1476                         r = -EFAULT;
1477                         break;
1478                 }
1479
1480                 /* Lowest order bit is reserved */
1481                 if (keys[i] & 0x01) {
1482                         r = -EINVAL;
1483                         break;
1484                 }
1485
1486                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1487                 if (r)
1488                         break;
1489         }
1490         srcu_read_unlock(&kvm->srcu, srcu_idx);
1491         up_read(&current->mm->mmap_sem);
1492 out:
1493         kvfree(keys);
1494         return r;
1495 }
1496
1497 /*
1498  * Base address and length must be sent at the start of each block, therefore
1499  * it's cheaper to send some clean data, as long as it's less than the size of
1500  * two longs.
1501  */
1502 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1503 /* for consistency */
1504 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1505
1506 /*
1507  * This function searches for the next page with dirty CMMA attributes, and
1508  * saves the attributes in the buffer up to either the end of the buffer or
1509  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1510  * no trailing clean bytes are saved.
1511  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1512  * output buffer will indicate 0 as length.
1513  */
1514 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1515                                   struct kvm_s390_cmma_log *args)
1516 {
1517         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1518         unsigned long bufsize, hva, pgstev, i, next, cur;
1519         int srcu_idx, peek, r = 0, rr;
1520         u8 *res;
1521
1522         cur = args->start_gfn;
1523         i = next = pgstev = 0;
1524
1525         if (unlikely(!kvm->arch.use_cmma))
1526                 return -ENXIO;
1527         /* Invalid/unsupported flags were specified */
1528         if (args->flags & ~KVM_S390_CMMA_PEEK)
1529                 return -EINVAL;
1530         /* Migration mode query, and we are not doing a migration */
1531         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1532         if (!peek && !s)
1533                 return -EINVAL;
1534         /* CMMA is disabled or was not used, or the buffer has length zero */
1535         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1536         if (!bufsize || !kvm->mm->context.use_cmma) {
1537                 memset(args, 0, sizeof(*args));
1538                 return 0;
1539         }
1540
1541         if (!peek) {
1542                 /* We are not peeking, and there are no dirty pages */
1543                 if (!atomic64_read(&s->dirty_pages)) {
1544                         memset(args, 0, sizeof(*args));
1545                         return 0;
1546                 }
1547                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1548                                     args->start_gfn);
1549                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1550                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1551                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1552                         memset(args, 0, sizeof(*args));
1553                         return 0;
1554                 }
1555                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1556         }
1557
1558         res = vmalloc(bufsize);
1559         if (!res)
1560                 return -ENOMEM;
1561
1562         args->start_gfn = cur;
1563
1564         down_read(&kvm->mm->mmap_sem);
1565         srcu_idx = srcu_read_lock(&kvm->srcu);
1566         while (i < bufsize) {
1567                 hva = gfn_to_hva(kvm, cur);
1568                 if (kvm_is_error_hva(hva)) {
1569                         r = -EFAULT;
1570                         break;
1571                 }
1572                 /* decrement only if we actually flipped the bit to 0 */
1573                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1574                         atomic64_dec(&s->dirty_pages);
1575                 r = get_pgste(kvm->mm, hva, &pgstev);
1576                 if (r < 0)
1577                         pgstev = 0;
1578                 /* save the value */
1579                 res[i++] = (pgstev >> 24) & 0x43;
1580                 /*
1581                  * if the next bit is too far away, stop.
1582                  * if we reached the previous "next", find the next one
1583                  */
1584                 if (!peek) {
1585                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1586                                 break;
1587                         if (cur == next)
1588                                 next = find_next_bit(s->pgste_bitmap,
1589                                                      s->bitmap_size, cur + 1);
1590                 /* reached the end of the bitmap or of the buffer, stop */
1591                         if ((next >= s->bitmap_size) ||
1592                             (next >= args->start_gfn + bufsize))
1593                                 break;
1594                 }
1595                 cur++;
1596         }
1597         srcu_read_unlock(&kvm->srcu, srcu_idx);
1598         up_read(&kvm->mm->mmap_sem);
1599         args->count = i;
1600         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1601
1602         rr = copy_to_user((void __user *)args->values, res, args->count);
1603         if (rr)
1604                 r = -EFAULT;
1605
1606         vfree(res);
1607         return r;
1608 }
1609
1610 /*
1611  * This function sets the CMMA attributes for the given pages. If the input
1612  * buffer has zero length, no action is taken, otherwise the attributes are
1613  * set and the mm->context.use_cmma flag is set.
1614  */
1615 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1616                                   const struct kvm_s390_cmma_log *args)
1617 {
1618         unsigned long hva, mask, pgstev, i;
1619         uint8_t *bits;
1620         int srcu_idx, r = 0;
1621
1622         mask = args->mask;
1623
1624         if (!kvm->arch.use_cmma)
1625                 return -ENXIO;
1626         /* invalid/unsupported flags */
1627         if (args->flags != 0)
1628                 return -EINVAL;
1629         /* Enforce sane limit on memory allocation */
1630         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1631                 return -EINVAL;
1632         /* Nothing to do */
1633         if (args->count == 0)
1634                 return 0;
1635
1636         bits = vmalloc(sizeof(*bits) * args->count);
1637         if (!bits)
1638                 return -ENOMEM;
1639
1640         r = copy_from_user(bits, (void __user *)args->values, args->count);
1641         if (r) {
1642                 r = -EFAULT;
1643                 goto out;
1644         }
1645
1646         down_read(&kvm->mm->mmap_sem);
1647         srcu_idx = srcu_read_lock(&kvm->srcu);
1648         for (i = 0; i < args->count; i++) {
1649                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1650                 if (kvm_is_error_hva(hva)) {
1651                         r = -EFAULT;
1652                         break;
1653                 }
1654
1655                 pgstev = bits[i];
1656                 pgstev = pgstev << 24;
1657                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1658                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1659         }
1660         srcu_read_unlock(&kvm->srcu, srcu_idx);
1661         up_read(&kvm->mm->mmap_sem);
1662
1663         if (!kvm->mm->context.use_cmma) {
1664                 down_write(&kvm->mm->mmap_sem);
1665                 kvm->mm->context.use_cmma = 1;
1666                 up_write(&kvm->mm->mmap_sem);
1667         }
1668 out:
1669         vfree(bits);
1670         return r;
1671 }
1672
1673 long kvm_arch_vm_ioctl(struct file *filp,
1674                        unsigned int ioctl, unsigned long arg)
1675 {
1676         struct kvm *kvm = filp->private_data;
1677         void __user *argp = (void __user *)arg;
1678         struct kvm_device_attr attr;
1679         int r;
1680
1681         switch (ioctl) {
1682         case KVM_S390_INTERRUPT: {
1683                 struct kvm_s390_interrupt s390int;
1684
1685                 r = -EFAULT;
1686                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1687                         break;
1688                 r = kvm_s390_inject_vm(kvm, &s390int);
1689                 break;
1690         }
1691         case KVM_ENABLE_CAP: {
1692                 struct kvm_enable_cap cap;
1693                 r = -EFAULT;
1694                 if (copy_from_user(&cap, argp, sizeof(cap)))
1695                         break;
1696                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1697                 break;
1698         }
1699         case KVM_CREATE_IRQCHIP: {
1700                 struct kvm_irq_routing_entry routing;
1701
1702                 r = -EINVAL;
1703                 if (kvm->arch.use_irqchip) {
1704                         /* Set up dummy routing. */
1705                         memset(&routing, 0, sizeof(routing));
1706                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1707                 }
1708                 break;
1709         }
1710         case KVM_SET_DEVICE_ATTR: {
1711                 r = -EFAULT;
1712                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1713                         break;
1714                 r = kvm_s390_vm_set_attr(kvm, &attr);
1715                 break;
1716         }
1717         case KVM_GET_DEVICE_ATTR: {
1718                 r = -EFAULT;
1719                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1720                         break;
1721                 r = kvm_s390_vm_get_attr(kvm, &attr);
1722                 break;
1723         }
1724         case KVM_HAS_DEVICE_ATTR: {
1725                 r = -EFAULT;
1726                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1727                         break;
1728                 r = kvm_s390_vm_has_attr(kvm, &attr);
1729                 break;
1730         }
1731         case KVM_S390_GET_SKEYS: {
1732                 struct kvm_s390_skeys args;
1733
1734                 r = -EFAULT;
1735                 if (copy_from_user(&args, argp,
1736                                    sizeof(struct kvm_s390_skeys)))
1737                         break;
1738                 r = kvm_s390_get_skeys(kvm, &args);
1739                 break;
1740         }
1741         case KVM_S390_SET_SKEYS: {
1742                 struct kvm_s390_skeys args;
1743
1744                 r = -EFAULT;
1745                 if (copy_from_user(&args, argp,
1746                                    sizeof(struct kvm_s390_skeys)))
1747                         break;
1748                 r = kvm_s390_set_skeys(kvm, &args);
1749                 break;
1750         }
1751         case KVM_S390_GET_CMMA_BITS: {
1752                 struct kvm_s390_cmma_log args;
1753
1754                 r = -EFAULT;
1755                 if (copy_from_user(&args, argp, sizeof(args)))
1756                         break;
1757                 r = kvm_s390_get_cmma_bits(kvm, &args);
1758                 if (!r) {
1759                         r = copy_to_user(argp, &args, sizeof(args));
1760                         if (r)
1761                                 r = -EFAULT;
1762                 }
1763                 break;
1764         }
1765         case KVM_S390_SET_CMMA_BITS: {
1766                 struct kvm_s390_cmma_log args;
1767
1768                 r = -EFAULT;
1769                 if (copy_from_user(&args, argp, sizeof(args)))
1770                         break;
1771                 r = kvm_s390_set_cmma_bits(kvm, &args);
1772                 break;
1773         }
1774         default:
1775                 r = -ENOTTY;
1776         }
1777
1778         return r;
1779 }
1780
1781 static int kvm_s390_query_ap_config(u8 *config)
1782 {
1783         u32 fcn_code = 0x04000000UL;
1784         u32 cc = 0;
1785
1786         memset(config, 0, 128);
1787         asm volatile(
1788                 "lgr 0,%1\n"
1789                 "lgr 2,%2\n"
1790                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1791                 "0: ipm %0\n"
1792                 "srl %0,28\n"
1793                 "1:\n"
1794                 EX_TABLE(0b, 1b)
1795                 : "+r" (cc)
1796                 : "r" (fcn_code), "r" (config)
1797                 : "cc", "0", "2", "memory"
1798         );
1799
1800         return cc;
1801 }
1802
1803 static int kvm_s390_apxa_installed(void)
1804 {
1805         u8 config[128];
1806         int cc;
1807
1808         if (test_facility(12)) {
1809                 cc = kvm_s390_query_ap_config(config);
1810
1811                 if (cc)
1812                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1813                 else
1814                         return config[0] & 0x40;
1815         }
1816
1817         return 0;
1818 }
1819
1820 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1821 {
1822         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1823
1824         if (kvm_s390_apxa_installed())
1825                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1826         else
1827                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1828 }
1829
1830 static u64 kvm_s390_get_initial_cpuid(void)
1831 {
1832         struct cpuid cpuid;
1833
1834         get_cpu_id(&cpuid);
1835         cpuid.version = 0xff;
1836         return *((u64 *) &cpuid);
1837 }
1838
1839 static void kvm_s390_crypto_init(struct kvm *kvm)
1840 {
1841         if (!test_kvm_facility(kvm, 76))
1842                 return;
1843
1844         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1845         kvm_s390_set_crycb_format(kvm);
1846
1847         /* Enable AES/DEA protected key functions by default */
1848         kvm->arch.crypto.aes_kw = 1;
1849         kvm->arch.crypto.dea_kw = 1;
1850         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1851                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1852         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1853                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1854 }
1855
1856 static void sca_dispose(struct kvm *kvm)
1857 {
1858         if (kvm->arch.use_esca)
1859                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1860         else
1861                 free_page((unsigned long)(kvm->arch.sca));
1862         kvm->arch.sca = NULL;
1863 }
1864
1865 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1866 {
1867         gfp_t alloc_flags = GFP_KERNEL;
1868         int i, rc;
1869         char debug_name[16];
1870         static unsigned long sca_offset;
1871
1872         rc = -EINVAL;
1873 #ifdef CONFIG_KVM_S390_UCONTROL
1874         if (type & ~KVM_VM_S390_UCONTROL)
1875                 goto out_err;
1876         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1877                 goto out_err;
1878 #else
1879         if (type)
1880                 goto out_err;
1881 #endif
1882
1883         rc = s390_enable_sie();
1884         if (rc)
1885                 goto out_err;
1886
1887         rc = -ENOMEM;
1888
1889         kvm->arch.use_esca = 0; /* start with basic SCA */
1890         if (!sclp.has_64bscao)
1891                 alloc_flags |= GFP_DMA;
1892         rwlock_init(&kvm->arch.sca_lock);
1893         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1894         if (!kvm->arch.sca)
1895                 goto out_err;
1896         spin_lock(&kvm_lock);
1897         sca_offset += 16;
1898         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1899                 sca_offset = 0;
1900         kvm->arch.sca = (struct bsca_block *)
1901                         ((char *) kvm->arch.sca + sca_offset);
1902         spin_unlock(&kvm_lock);
1903
1904         sprintf(debug_name, "kvm-%u", current->pid);
1905
1906         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1907         if (!kvm->arch.dbf)
1908                 goto out_err;
1909
1910         kvm->arch.sie_page2 =
1911              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1912         if (!kvm->arch.sie_page2)
1913                 goto out_err;
1914
1915         /* Populate the facility mask initially. */
1916         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1917                sizeof(S390_lowcore.stfle_fac_list));
1918         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1919                 if (i < kvm_s390_fac_list_mask_size())
1920                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1921                 else
1922                         kvm->arch.model.fac_mask[i] = 0UL;
1923         }
1924
1925         /* Populate the facility list initially. */
1926         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1927         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1928                S390_ARCH_FAC_LIST_SIZE_BYTE);
1929
1930         /* we are always in czam mode - even on pre z14 machines */
1931         set_kvm_facility(kvm->arch.model.fac_mask, 138);
1932         set_kvm_facility(kvm->arch.model.fac_list, 138);
1933         /* we emulate STHYI in kvm */
1934         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1935         set_kvm_facility(kvm->arch.model.fac_list, 74);
1936         if (MACHINE_HAS_TLB_GUEST) {
1937                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1938                 set_kvm_facility(kvm->arch.model.fac_list, 147);
1939         }
1940
1941         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1942         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1943
1944         kvm_s390_crypto_init(kvm);
1945
1946         mutex_init(&kvm->arch.float_int.ais_lock);
1947         kvm->arch.float_int.simm = 0;
1948         kvm->arch.float_int.nimm = 0;
1949         spin_lock_init(&kvm->arch.float_int.lock);
1950         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1951                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1952         init_waitqueue_head(&kvm->arch.ipte_wq);
1953         mutex_init(&kvm->arch.ipte_mutex);
1954
1955         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1956         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1957
1958         if (type & KVM_VM_S390_UCONTROL) {
1959                 kvm->arch.gmap = NULL;
1960                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1961         } else {
1962                 if (sclp.hamax == U64_MAX)
1963                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1964                 else
1965                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1966                                                     sclp.hamax + 1);
1967                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1968                 if (!kvm->arch.gmap)
1969                         goto out_err;
1970                 kvm->arch.gmap->private = kvm;
1971                 kvm->arch.gmap->pfault_enabled = 0;
1972         }
1973
1974         kvm->arch.css_support = 0;
1975         kvm->arch.use_irqchip = 0;
1976         kvm->arch.epoch = 0;
1977
1978         spin_lock_init(&kvm->arch.start_stop_lock);
1979         kvm_s390_vsie_init(kvm);
1980         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1981
1982         return 0;
1983 out_err:
1984         free_page((unsigned long)kvm->arch.sie_page2);
1985         debug_unregister(kvm->arch.dbf);
1986         sca_dispose(kvm);
1987         KVM_EVENT(3, "creation of vm failed: %d", rc);
1988         return rc;
1989 }
1990
1991 bool kvm_arch_has_vcpu_debugfs(void)
1992 {
1993         return false;
1994 }
1995
1996 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1997 {
1998         return 0;
1999 }
2000
2001 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2002 {
2003         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2004         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2005         kvm_s390_clear_local_irqs(vcpu);
2006         kvm_clear_async_pf_completion_queue(vcpu);
2007         if (!kvm_is_ucontrol(vcpu->kvm))
2008                 sca_del_vcpu(vcpu);
2009
2010         if (kvm_is_ucontrol(vcpu->kvm))
2011                 gmap_remove(vcpu->arch.gmap);
2012
2013         if (vcpu->kvm->arch.use_cmma)
2014                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2015         free_page((unsigned long)(vcpu->arch.sie_block));
2016
2017         kvm_vcpu_uninit(vcpu);
2018         kmem_cache_free(kvm_vcpu_cache, vcpu);
2019 }
2020
2021 static void kvm_free_vcpus(struct kvm *kvm)
2022 {
2023         unsigned int i;
2024         struct kvm_vcpu *vcpu;
2025
2026         kvm_for_each_vcpu(i, vcpu, kvm)
2027                 kvm_arch_vcpu_destroy(vcpu);
2028
2029         mutex_lock(&kvm->lock);
2030         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2031                 kvm->vcpus[i] = NULL;
2032
2033         atomic_set(&kvm->online_vcpus, 0);
2034         mutex_unlock(&kvm->lock);
2035 }
2036
2037 void kvm_arch_destroy_vm(struct kvm *kvm)
2038 {
2039         kvm_free_vcpus(kvm);
2040         sca_dispose(kvm);
2041         debug_unregister(kvm->arch.dbf);
2042         free_page((unsigned long)kvm->arch.sie_page2);
2043         if (!kvm_is_ucontrol(kvm))
2044                 gmap_remove(kvm->arch.gmap);
2045         kvm_s390_destroy_adapters(kvm);
2046         kvm_s390_clear_float_irqs(kvm);
2047         kvm_s390_vsie_destroy(kvm);
2048         if (kvm->arch.migration_state) {
2049                 vfree(kvm->arch.migration_state->pgste_bitmap);
2050                 kfree(kvm->arch.migration_state);
2051         }
2052         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2053 }
2054
2055 /* Section: vcpu related */
2056 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2057 {
2058         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2059         if (!vcpu->arch.gmap)
2060                 return -ENOMEM;
2061         vcpu->arch.gmap->private = vcpu->kvm;
2062
2063         return 0;
2064 }
2065
2066 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2067 {
2068         if (!kvm_s390_use_sca_entries())
2069                 return;
2070         read_lock(&vcpu->kvm->arch.sca_lock);
2071         if (vcpu->kvm->arch.use_esca) {
2072                 struct esca_block *sca = vcpu->kvm->arch.sca;
2073
2074                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2075                 sca->cpu[vcpu->vcpu_id].sda = 0;
2076         } else {
2077                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2078
2079                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2080                 sca->cpu[vcpu->vcpu_id].sda = 0;
2081         }
2082         read_unlock(&vcpu->kvm->arch.sca_lock);
2083 }
2084
2085 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2086 {
2087         if (!kvm_s390_use_sca_entries()) {
2088                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2089
2090                 /* we still need the basic sca for the ipte control */
2091                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2092                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2093         }
2094         read_lock(&vcpu->kvm->arch.sca_lock);
2095         if (vcpu->kvm->arch.use_esca) {
2096                 struct esca_block *sca = vcpu->kvm->arch.sca;
2097
2098                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2099                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2100                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2101                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2102                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2103         } else {
2104                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2105
2106                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2107                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2108                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2109                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2110         }
2111         read_unlock(&vcpu->kvm->arch.sca_lock);
2112 }
2113
2114 /* Basic SCA to Extended SCA data copy routines */
2115 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2116 {
2117         d->sda = s->sda;
2118         d->sigp_ctrl.c = s->sigp_ctrl.c;
2119         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2120 }
2121
2122 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2123 {
2124         int i;
2125
2126         d->ipte_control = s->ipte_control;
2127         d->mcn[0] = s->mcn;
2128         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2129                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2130 }
2131
2132 static int sca_switch_to_extended(struct kvm *kvm)
2133 {
2134         struct bsca_block *old_sca = kvm->arch.sca;
2135         struct esca_block *new_sca;
2136         struct kvm_vcpu *vcpu;
2137         unsigned int vcpu_idx;
2138         u32 scaol, scaoh;
2139
2140         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2141         if (!new_sca)
2142                 return -ENOMEM;
2143
2144         scaoh = (u32)((u64)(new_sca) >> 32);
2145         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2146
2147         kvm_s390_vcpu_block_all(kvm);
2148         write_lock(&kvm->arch.sca_lock);
2149
2150         sca_copy_b_to_e(new_sca, old_sca);
2151
2152         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2153                 vcpu->arch.sie_block->scaoh = scaoh;
2154                 vcpu->arch.sie_block->scaol = scaol;
2155                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2156         }
2157         kvm->arch.sca = new_sca;
2158         kvm->arch.use_esca = 1;
2159
2160         write_unlock(&kvm->arch.sca_lock);
2161         kvm_s390_vcpu_unblock_all(kvm);
2162
2163         free_page((unsigned long)old_sca);
2164
2165         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2166                  old_sca, kvm->arch.sca);
2167         return 0;
2168 }
2169
2170 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2171 {
2172         int rc;
2173
2174         if (!kvm_s390_use_sca_entries()) {
2175                 if (id < KVM_MAX_VCPUS)
2176                         return true;
2177                 return false;
2178         }
2179         if (id < KVM_S390_BSCA_CPU_SLOTS)
2180                 return true;
2181         if (!sclp.has_esca || !sclp.has_64bscao)
2182                 return false;
2183
2184         mutex_lock(&kvm->lock);
2185         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2186         mutex_unlock(&kvm->lock);
2187
2188         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2189 }
2190
2191 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2192 {
2193         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2194         kvm_clear_async_pf_completion_queue(vcpu);
2195         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2196                                     KVM_SYNC_GPRS |
2197                                     KVM_SYNC_ACRS |
2198                                     KVM_SYNC_CRS |
2199                                     KVM_SYNC_ARCH0 |
2200                                     KVM_SYNC_PFAULT;
2201         kvm_s390_set_prefix(vcpu, 0);
2202         if (test_kvm_facility(vcpu->kvm, 64))
2203                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2204         if (test_kvm_facility(vcpu->kvm, 133))
2205                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2206         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2207          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2208          */
2209         if (MACHINE_HAS_VX)
2210                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2211         else
2212                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2213
2214         if (kvm_is_ucontrol(vcpu->kvm))
2215                 return __kvm_ucontrol_vcpu_init(vcpu);
2216
2217         return 0;
2218 }
2219
2220 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2221 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2222 {
2223         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2224         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2225         vcpu->arch.cputm_start = get_tod_clock_fast();
2226         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2227 }
2228
2229 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2230 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2231 {
2232         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2233         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2234         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2235         vcpu->arch.cputm_start = 0;
2236         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2237 }
2238
2239 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2240 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2241 {
2242         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2243         vcpu->arch.cputm_enabled = true;
2244         __start_cpu_timer_accounting(vcpu);
2245 }
2246
2247 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2248 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2249 {
2250         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2251         __stop_cpu_timer_accounting(vcpu);
2252         vcpu->arch.cputm_enabled = false;
2253 }
2254
2255 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2256 {
2257         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2258         __enable_cpu_timer_accounting(vcpu);
2259         preempt_enable();
2260 }
2261
2262 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2263 {
2264         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2265         __disable_cpu_timer_accounting(vcpu);
2266         preempt_enable();
2267 }
2268
2269 /* set the cpu timer - may only be called from the VCPU thread itself */
2270 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2271 {
2272         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2273         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2274         if (vcpu->arch.cputm_enabled)
2275                 vcpu->arch.cputm_start = get_tod_clock_fast();
2276         vcpu->arch.sie_block->cputm = cputm;
2277         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2278         preempt_enable();
2279 }
2280
2281 /* update and get the cpu timer - can also be called from other VCPU threads */
2282 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2283 {
2284         unsigned int seq;
2285         __u64 value;
2286
2287         if (unlikely(!vcpu->arch.cputm_enabled))
2288                 return vcpu->arch.sie_block->cputm;
2289
2290         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2291         do {
2292                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2293                 /*
2294                  * If the writer would ever execute a read in the critical
2295                  * section, e.g. in irq context, we have a deadlock.
2296                  */
2297                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2298                 value = vcpu->arch.sie_block->cputm;
2299                 /* if cputm_start is 0, accounting is being started/stopped */
2300                 if (likely(vcpu->arch.cputm_start))
2301                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2302         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2303         preempt_enable();
2304         return value;
2305 }
2306
2307 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2308 {
2309
2310         gmap_enable(vcpu->arch.enabled_gmap);
2311         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2312         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2313                 __start_cpu_timer_accounting(vcpu);
2314         vcpu->cpu = cpu;
2315 }
2316
2317 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2318 {
2319         vcpu->cpu = -1;
2320         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2321                 __stop_cpu_timer_accounting(vcpu);
2322         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2323         vcpu->arch.enabled_gmap = gmap_get_enabled();
2324         gmap_disable(vcpu->arch.enabled_gmap);
2325
2326 }
2327
2328 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2329 {
2330         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2331         vcpu->arch.sie_block->gpsw.mask = 0UL;
2332         vcpu->arch.sie_block->gpsw.addr = 0UL;
2333         kvm_s390_set_prefix(vcpu, 0);
2334         kvm_s390_set_cpu_timer(vcpu, 0);
2335         vcpu->arch.sie_block->ckc       = 0UL;
2336         vcpu->arch.sie_block->todpr     = 0;
2337         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2338         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2339         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2340         /* make sure the new fpc will be lazily loaded */
2341         save_fpu_regs();
2342         current->thread.fpu.fpc = 0;
2343         vcpu->arch.sie_block->gbea = 1;
2344         vcpu->arch.sie_block->pp = 0;
2345         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2346         kvm_clear_async_pf_completion_queue(vcpu);
2347         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2348                 kvm_s390_vcpu_stop(vcpu);
2349         kvm_s390_clear_local_irqs(vcpu);
2350 }
2351
2352 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2353 {
2354         mutex_lock(&vcpu->kvm->lock);
2355         preempt_disable();
2356         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2357         preempt_enable();
2358         mutex_unlock(&vcpu->kvm->lock);
2359         if (!kvm_is_ucontrol(vcpu->kvm)) {
2360                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2361                 sca_add_vcpu(vcpu);
2362         }
2363         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2364                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2365         /* make vcpu_load load the right gmap on the first trigger */
2366         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2367 }
2368
2369 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2370 {
2371         if (!test_kvm_facility(vcpu->kvm, 76))
2372                 return;
2373
2374         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2375
2376         if (vcpu->kvm->arch.crypto.aes_kw)
2377                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2378         if (vcpu->kvm->arch.crypto.dea_kw)
2379                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2380
2381         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2382 }
2383
2384 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2385 {
2386         free_page(vcpu->arch.sie_block->cbrlo);
2387         vcpu->arch.sie_block->cbrlo = 0;
2388 }
2389
2390 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2391 {
2392         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2393         if (!vcpu->arch.sie_block->cbrlo)
2394                 return -ENOMEM;
2395
2396         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2397         return 0;
2398 }
2399
2400 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2401 {
2402         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2403
2404         vcpu->arch.sie_block->ibc = model->ibc;
2405         if (test_kvm_facility(vcpu->kvm, 7))
2406                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2407 }
2408
2409 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2410 {
2411         int rc = 0;
2412
2413         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2414                                                     CPUSTAT_SM |
2415                                                     CPUSTAT_STOPPED);
2416
2417         if (test_kvm_facility(vcpu->kvm, 78))
2418                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2419         else if (test_kvm_facility(vcpu->kvm, 8))
2420                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2421
2422         kvm_s390_vcpu_setup_model(vcpu);
2423
2424         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2425         if (MACHINE_HAS_ESOP)
2426                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2427         if (test_kvm_facility(vcpu->kvm, 9))
2428                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2429         if (test_kvm_facility(vcpu->kvm, 73))
2430                 vcpu->arch.sie_block->ecb |= ECB_TE;
2431
2432         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2433                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2434         if (test_kvm_facility(vcpu->kvm, 130))
2435                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2436         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2437         if (sclp.has_cei)
2438                 vcpu->arch.sie_block->eca |= ECA_CEI;
2439         if (sclp.has_ib)
2440                 vcpu->arch.sie_block->eca |= ECA_IB;
2441         if (sclp.has_siif)
2442                 vcpu->arch.sie_block->eca |= ECA_SII;
2443         if (sclp.has_sigpif)
2444                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2445         if (test_kvm_facility(vcpu->kvm, 129)) {
2446                 vcpu->arch.sie_block->eca |= ECA_VX;
2447                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2448         }
2449         if (test_kvm_facility(vcpu->kvm, 139))
2450                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2451
2452         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2453                                         | SDNXC;
2454         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2455
2456         if (sclp.has_kss)
2457                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2458         else
2459                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2460
2461         if (vcpu->kvm->arch.use_cmma) {
2462                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2463                 if (rc)
2464                         return rc;
2465         }
2466         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2467         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2468
2469         kvm_s390_vcpu_crypto_setup(vcpu);
2470
2471         return rc;
2472 }
2473
2474 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2475                                       unsigned int id)
2476 {
2477         struct kvm_vcpu *vcpu;
2478         struct sie_page *sie_page;
2479         int rc = -EINVAL;
2480
2481         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2482                 goto out;
2483
2484         rc = -ENOMEM;
2485
2486         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2487         if (!vcpu)
2488                 goto out;
2489
2490         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2491         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2492         if (!sie_page)
2493                 goto out_free_cpu;
2494
2495         vcpu->arch.sie_block = &sie_page->sie_block;
2496         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2497
2498         /* the real guest size will always be smaller than msl */
2499         vcpu->arch.sie_block->mso = 0;
2500         vcpu->arch.sie_block->msl = sclp.hamax;
2501
2502         vcpu->arch.sie_block->icpua = id;
2503         spin_lock_init(&vcpu->arch.local_int.lock);
2504         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2505         vcpu->arch.local_int.wq = &vcpu->wq;
2506         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2507         seqcount_init(&vcpu->arch.cputm_seqcount);
2508
2509         rc = kvm_vcpu_init(vcpu, kvm, id);
2510         if (rc)
2511                 goto out_free_sie_block;
2512         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2513                  vcpu->arch.sie_block);
2514         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2515
2516         return vcpu;
2517 out_free_sie_block:
2518         free_page((unsigned long)(vcpu->arch.sie_block));
2519 out_free_cpu:
2520         kmem_cache_free(kvm_vcpu_cache, vcpu);
2521 out:
2522         return ERR_PTR(rc);
2523 }
2524
2525 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2526 {
2527         return kvm_s390_vcpu_has_irq(vcpu, 0);
2528 }
2529
2530 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2531 {
2532         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2533 }
2534
2535 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2536 {
2537         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2538         exit_sie(vcpu);
2539 }
2540
2541 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2542 {
2543         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2544 }
2545
2546 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2547 {
2548         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2549         exit_sie(vcpu);
2550 }
2551
2552 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2553 {
2554         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2555 }
2556
2557 /*
2558  * Kick a guest cpu out of SIE and wait until SIE is not running.
2559  * If the CPU is not running (e.g. waiting as idle) the function will
2560  * return immediately. */
2561 void exit_sie(struct kvm_vcpu *vcpu)
2562 {
2563         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2564         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2565                 cpu_relax();
2566 }
2567
2568 /* Kick a guest cpu out of SIE to process a request synchronously */
2569 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2570 {
2571         kvm_make_request(req, vcpu);
2572         kvm_s390_vcpu_request(vcpu);
2573 }
2574
2575 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2576                               unsigned long end)
2577 {
2578         struct kvm *kvm = gmap->private;
2579         struct kvm_vcpu *vcpu;
2580         unsigned long prefix;
2581         int i;
2582
2583         if (gmap_is_shadow(gmap))
2584                 return;
2585         if (start >= 1UL << 31)
2586                 /* We are only interested in prefix pages */
2587                 return;
2588         kvm_for_each_vcpu(i, vcpu, kvm) {
2589                 /* match against both prefix pages */
2590                 prefix = kvm_s390_get_prefix(vcpu);
2591                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2592                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2593                                    start, end);
2594                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2595                 }
2596         }
2597 }
2598
2599 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2600 {
2601         /* kvm common code refers to this, but never calls it */
2602         BUG();
2603         return 0;
2604 }
2605
2606 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2607                                            struct kvm_one_reg *reg)
2608 {
2609         int r = -EINVAL;
2610
2611         switch (reg->id) {
2612         case KVM_REG_S390_TODPR:
2613                 r = put_user(vcpu->arch.sie_block->todpr,
2614                              (u32 __user *)reg->addr);
2615                 break;
2616         case KVM_REG_S390_EPOCHDIFF:
2617                 r = put_user(vcpu->arch.sie_block->epoch,
2618                              (u64 __user *)reg->addr);
2619                 break;
2620         case KVM_REG_S390_CPU_TIMER:
2621                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2622                              (u64 __user *)reg->addr);
2623                 break;
2624         case KVM_REG_S390_CLOCK_COMP:
2625                 r = put_user(vcpu->arch.sie_block->ckc,
2626                              (u64 __user *)reg->addr);
2627                 break;
2628         case KVM_REG_S390_PFTOKEN:
2629                 r = put_user(vcpu->arch.pfault_token,
2630                              (u64 __user *)reg->addr);
2631                 break;
2632         case KVM_REG_S390_PFCOMPARE:
2633                 r = put_user(vcpu->arch.pfault_compare,
2634                              (u64 __user *)reg->addr);
2635                 break;
2636         case KVM_REG_S390_PFSELECT:
2637                 r = put_user(vcpu->arch.pfault_select,
2638                              (u64 __user *)reg->addr);
2639                 break;
2640         case KVM_REG_S390_PP:
2641                 r = put_user(vcpu->arch.sie_block->pp,
2642                              (u64 __user *)reg->addr);
2643                 break;
2644         case KVM_REG_S390_GBEA:
2645                 r = put_user(vcpu->arch.sie_block->gbea,
2646                              (u64 __user *)reg->addr);
2647                 break;
2648         default:
2649                 break;
2650         }
2651
2652         return r;
2653 }
2654
2655 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2656                                            struct kvm_one_reg *reg)
2657 {
2658         int r = -EINVAL;
2659         __u64 val;
2660
2661         switch (reg->id) {
2662         case KVM_REG_S390_TODPR:
2663                 r = get_user(vcpu->arch.sie_block->todpr,
2664                              (u32 __user *)reg->addr);
2665                 break;
2666         case KVM_REG_S390_EPOCHDIFF:
2667                 r = get_user(vcpu->arch.sie_block->epoch,
2668                              (u64 __user *)reg->addr);
2669                 break;
2670         case KVM_REG_S390_CPU_TIMER:
2671                 r = get_user(val, (u64 __user *)reg->addr);
2672                 if (!r)
2673                         kvm_s390_set_cpu_timer(vcpu, val);
2674                 break;
2675         case KVM_REG_S390_CLOCK_COMP:
2676                 r = get_user(vcpu->arch.sie_block->ckc,
2677                              (u64 __user *)reg->addr);
2678                 break;
2679         case KVM_REG_S390_PFTOKEN:
2680                 r = get_user(vcpu->arch.pfault_token,
2681                              (u64 __user *)reg->addr);
2682                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2683                         kvm_clear_async_pf_completion_queue(vcpu);
2684                 break;
2685         case KVM_REG_S390_PFCOMPARE:
2686                 r = get_user(vcpu->arch.pfault_compare,
2687                              (u64 __user *)reg->addr);
2688                 break;
2689         case KVM_REG_S390_PFSELECT:
2690                 r = get_user(vcpu->arch.pfault_select,
2691                              (u64 __user *)reg->addr);
2692                 break;
2693         case KVM_REG_S390_PP:
2694                 r = get_user(vcpu->arch.sie_block->pp,
2695                              (u64 __user *)reg->addr);
2696                 break;
2697         case KVM_REG_S390_GBEA:
2698                 r = get_user(vcpu->arch.sie_block->gbea,
2699                              (u64 __user *)reg->addr);
2700                 break;
2701         default:
2702                 break;
2703         }
2704
2705         return r;
2706 }
2707
2708 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2709 {
2710         kvm_s390_vcpu_initial_reset(vcpu);
2711         return 0;
2712 }
2713
2714 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2715 {
2716         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2717         return 0;
2718 }
2719
2720 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2721 {
2722         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2723         return 0;
2724 }
2725
2726 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2727                                   struct kvm_sregs *sregs)
2728 {
2729         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2730         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2731         return 0;
2732 }
2733
2734 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2735                                   struct kvm_sregs *sregs)
2736 {
2737         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2738         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2739         return 0;
2740 }
2741
2742 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2743 {
2744         if (test_fp_ctl(fpu->fpc))
2745                 return -EINVAL;
2746         vcpu->run->s.regs.fpc = fpu->fpc;
2747         if (MACHINE_HAS_VX)
2748                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2749                                  (freg_t *) fpu->fprs);
2750         else
2751                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2752         return 0;
2753 }
2754
2755 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2756 {
2757         /* make sure we have the latest values */
2758         save_fpu_regs();
2759         if (MACHINE_HAS_VX)
2760                 convert_vx_to_fp((freg_t *) fpu->fprs,
2761                                  (__vector128 *) vcpu->run->s.regs.vrs);
2762         else
2763                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2764         fpu->fpc = vcpu->run->s.regs.fpc;
2765         return 0;
2766 }
2767
2768 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2769 {
2770         int rc = 0;
2771
2772         if (!is_vcpu_stopped(vcpu))
2773                 rc = -EBUSY;
2774         else {
2775                 vcpu->run->psw_mask = psw.mask;
2776                 vcpu->run->psw_addr = psw.addr;
2777         }
2778         return rc;
2779 }
2780
2781 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2782                                   struct kvm_translation *tr)
2783 {
2784         return -EINVAL; /* not implemented yet */
2785 }
2786
2787 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2788                               KVM_GUESTDBG_USE_HW_BP | \
2789                               KVM_GUESTDBG_ENABLE)
2790
2791 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2792                                         struct kvm_guest_debug *dbg)
2793 {
2794         int rc = 0;
2795
2796         vcpu->guest_debug = 0;
2797         kvm_s390_clear_bp_data(vcpu);
2798
2799         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2800                 return -EINVAL;
2801         if (!sclp.has_gpere)
2802                 return -EINVAL;
2803
2804         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2805                 vcpu->guest_debug = dbg->control;
2806                 /* enforce guest PER */
2807                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2808
2809                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2810                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2811         } else {
2812                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2813                 vcpu->arch.guestdbg.last_bp = 0;
2814         }
2815
2816         if (rc) {
2817                 vcpu->guest_debug = 0;
2818                 kvm_s390_clear_bp_data(vcpu);
2819                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2820         }
2821
2822         return rc;
2823 }
2824
2825 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2826                                     struct kvm_mp_state *mp_state)
2827 {
2828         /* CHECK_STOP and LOAD are not supported yet */
2829         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2830                                        KVM_MP_STATE_OPERATING;
2831 }
2832
2833 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2834                                     struct kvm_mp_state *mp_state)
2835 {
2836         int rc = 0;
2837
2838         /* user space knows about this interface - let it control the state */
2839         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2840
2841         switch (mp_state->mp_state) {
2842         case KVM_MP_STATE_STOPPED:
2843                 kvm_s390_vcpu_stop(vcpu);
2844                 break;
2845         case KVM_MP_STATE_OPERATING:
2846                 kvm_s390_vcpu_start(vcpu);
2847                 break;
2848         case KVM_MP_STATE_LOAD:
2849         case KVM_MP_STATE_CHECK_STOP:
2850                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2851         default:
2852                 rc = -ENXIO;
2853         }
2854
2855         return rc;
2856 }
2857
2858 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2859 {
2860         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2861 }
2862
2863 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2864 {
2865 retry:
2866         kvm_s390_vcpu_request_handled(vcpu);
2867         if (!kvm_request_pending(vcpu))
2868                 return 0;
2869         /*
2870          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2871          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2872          * This ensures that the ipte instruction for this request has
2873          * already finished. We might race against a second unmapper that
2874          * wants to set the blocking bit. Lets just retry the request loop.
2875          */
2876         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2877                 int rc;
2878                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2879                                           kvm_s390_get_prefix(vcpu),
2880                                           PAGE_SIZE * 2, PROT_WRITE);
2881                 if (rc) {
2882                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2883                         return rc;
2884                 }
2885                 goto retry;
2886         }
2887
2888         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2889                 vcpu->arch.sie_block->ihcpu = 0xffff;
2890                 goto retry;
2891         }
2892
2893         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2894                 if (!ibs_enabled(vcpu)) {
2895                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2896                         atomic_or(CPUSTAT_IBS,
2897                                         &vcpu->arch.sie_block->cpuflags);
2898                 }
2899                 goto retry;
2900         }
2901
2902         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2903                 if (ibs_enabled(vcpu)) {
2904                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2905                         atomic_andnot(CPUSTAT_IBS,
2906                                           &vcpu->arch.sie_block->cpuflags);
2907                 }
2908                 goto retry;
2909         }
2910
2911         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2912                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2913                 goto retry;
2914         }
2915
2916         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2917                 /*
2918                  * Disable CMMA virtualization; we will emulate the ESSA
2919                  * instruction manually, in order to provide additional
2920                  * functionalities needed for live migration.
2921                  */
2922                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2923                 goto retry;
2924         }
2925
2926         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2927                 /*
2928                  * Re-enable CMMA virtualization if CMMA is available and
2929                  * was used.
2930                  */
2931                 if ((vcpu->kvm->arch.use_cmma) &&
2932                     (vcpu->kvm->mm->context.use_cmma))
2933                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2934                 goto retry;
2935         }
2936
2937         /* nothing to do, just clear the request */
2938         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2939
2940         return 0;
2941 }
2942
2943 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2944                                  const struct kvm_s390_vm_tod_clock *gtod)
2945 {
2946         struct kvm_vcpu *vcpu;
2947         struct kvm_s390_tod_clock_ext htod;
2948         int i;
2949
2950         mutex_lock(&kvm->lock);
2951         preempt_disable();
2952
2953         get_tod_clock_ext((char *)&htod);
2954
2955         kvm->arch.epoch = gtod->tod - htod.tod;
2956         kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2957
2958         if (kvm->arch.epoch > gtod->tod)
2959                 kvm->arch.epdx -= 1;
2960
2961         kvm_s390_vcpu_block_all(kvm);
2962         kvm_for_each_vcpu(i, vcpu, kvm) {
2963                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2964                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2965         }
2966
2967         kvm_s390_vcpu_unblock_all(kvm);
2968         preempt_enable();
2969         mutex_unlock(&kvm->lock);
2970 }
2971
2972 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2973 {
2974         struct kvm_vcpu *vcpu;
2975         int i;
2976
2977         mutex_lock(&kvm->lock);
2978         preempt_disable();
2979         kvm->arch.epoch = tod - get_tod_clock();
2980         kvm_s390_vcpu_block_all(kvm);
2981         kvm_for_each_vcpu(i, vcpu, kvm)
2982                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2983         kvm_s390_vcpu_unblock_all(kvm);
2984         preempt_enable();
2985         mutex_unlock(&kvm->lock);
2986 }
2987
2988 /**
2989  * kvm_arch_fault_in_page - fault-in guest page if necessary
2990  * @vcpu: The corresponding virtual cpu
2991  * @gpa: Guest physical address
2992  * @writable: Whether the page should be writable or not
2993  *
2994  * Make sure that a guest page has been faulted-in on the host.
2995  *
2996  * Return: Zero on success, negative error code otherwise.
2997  */
2998 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2999 {
3000         return gmap_fault(vcpu->arch.gmap, gpa,
3001                           writable ? FAULT_FLAG_WRITE : 0);
3002 }
3003
3004 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3005                                       unsigned long token)
3006 {
3007         struct kvm_s390_interrupt inti;
3008         struct kvm_s390_irq irq;
3009
3010         if (start_token) {
3011                 irq.u.ext.ext_params2 = token;
3012                 irq.type = KVM_S390_INT_PFAULT_INIT;
3013                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3014         } else {
3015                 inti.type = KVM_S390_INT_PFAULT_DONE;
3016                 inti.parm64 = token;
3017                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3018         }
3019 }
3020
3021 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3022                                      struct kvm_async_pf *work)
3023 {
3024         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3025         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3026 }
3027
3028 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3029                                  struct kvm_async_pf *work)
3030 {
3031         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3032         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3033 }
3034
3035 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3036                                struct kvm_async_pf *work)
3037 {
3038         /* s390 will always inject the page directly */
3039 }
3040
3041 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3042 {
3043         /*
3044          * s390 will always inject the page directly,
3045          * but we still want check_async_completion to cleanup
3046          */
3047         return true;
3048 }
3049
3050 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3051 {
3052         hva_t hva;
3053         struct kvm_arch_async_pf arch;
3054         int rc;
3055
3056         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3057                 return 0;
3058         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3059             vcpu->arch.pfault_compare)
3060                 return 0;
3061         if (psw_extint_disabled(vcpu))
3062                 return 0;
3063         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3064                 return 0;
3065         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3066                 return 0;
3067         if (!vcpu->arch.gmap->pfault_enabled)
3068                 return 0;
3069
3070         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3071         hva += current->thread.gmap_addr & ~PAGE_MASK;
3072         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3073                 return 0;
3074
3075         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3076         return rc;
3077 }
3078
3079 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3080 {
3081         int rc, cpuflags;
3082
3083         /*
3084          * On s390 notifications for arriving pages will be delivered directly
3085          * to the guest but the house keeping for completed pfaults is
3086          * handled outside the worker.
3087          */
3088         kvm_check_async_pf_completion(vcpu);
3089
3090         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3091         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3092
3093         if (need_resched())
3094                 schedule();
3095
3096         if (test_cpu_flag(CIF_MCCK_PENDING))
3097                 s390_handle_mcck();
3098
3099         if (!kvm_is_ucontrol(vcpu->kvm)) {
3100                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3101                 if (rc)
3102                         return rc;
3103         }
3104
3105         rc = kvm_s390_handle_requests(vcpu);
3106         if (rc)
3107                 return rc;
3108
3109         if (guestdbg_enabled(vcpu)) {
3110                 kvm_s390_backup_guest_per_regs(vcpu);
3111                 kvm_s390_patch_guest_per_regs(vcpu);
3112         }
3113
3114         vcpu->arch.sie_block->icptcode = 0;
3115         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3116         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3117         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3118
3119         return 0;
3120 }
3121
3122 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3123 {
3124         struct kvm_s390_pgm_info pgm_info = {
3125                 .code = PGM_ADDRESSING,
3126         };
3127         u8 opcode, ilen;
3128         int rc;
3129
3130         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3131         trace_kvm_s390_sie_fault(vcpu);
3132
3133         /*
3134          * We want to inject an addressing exception, which is defined as a
3135          * suppressing or terminating exception. However, since we came here
3136          * by a DAT access exception, the PSW still points to the faulting
3137          * instruction since DAT exceptions are nullifying. So we've got
3138          * to look up the current opcode to get the length of the instruction
3139          * to be able to forward the PSW.
3140          */
3141         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3142         ilen = insn_length(opcode);
3143         if (rc < 0) {
3144                 return rc;
3145         } else if (rc) {
3146                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3147                  * Forward by arbitrary ilc, injection will take care of
3148                  * nullification if necessary.
3149                  */
3150                 pgm_info = vcpu->arch.pgm;
3151                 ilen = 4;
3152         }
3153         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3154         kvm_s390_forward_psw(vcpu, ilen);
3155         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3156 }
3157
3158 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3159 {
3160         struct mcck_volatile_info *mcck_info;
3161         struct sie_page *sie_page;
3162
3163         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3164                    vcpu->arch.sie_block->icptcode);
3165         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3166
3167         if (guestdbg_enabled(vcpu))
3168                 kvm_s390_restore_guest_per_regs(vcpu);
3169
3170         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3171         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3172
3173         if (exit_reason == -EINTR) {
3174                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3175                 sie_page = container_of(vcpu->arch.sie_block,
3176                                         struct sie_page, sie_block);
3177                 mcck_info = &sie_page->mcck_info;
3178                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3179                 return 0;
3180         }
3181
3182         if (vcpu->arch.sie_block->icptcode > 0) {
3183                 int rc = kvm_handle_sie_intercept(vcpu);
3184
3185                 if (rc != -EOPNOTSUPP)
3186                         return rc;
3187                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3188                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3189                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3190                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3191                 return -EREMOTE;
3192         } else if (exit_reason != -EFAULT) {
3193                 vcpu->stat.exit_null++;
3194                 return 0;
3195         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3196                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3197                 vcpu->run->s390_ucontrol.trans_exc_code =
3198                                                 current->thread.gmap_addr;
3199                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3200                 return -EREMOTE;
3201         } else if (current->thread.gmap_pfault) {
3202                 trace_kvm_s390_major_guest_pfault(vcpu);
3203                 current->thread.gmap_pfault = 0;
3204                 if (kvm_arch_setup_async_pf(vcpu))
3205                         return 0;
3206                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3207         }
3208         return vcpu_post_run_fault_in_sie(vcpu);
3209 }
3210
3211 static int __vcpu_run(struct kvm_vcpu *vcpu)
3212 {
3213         int rc, exit_reason;
3214
3215         /*
3216          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3217          * ning the guest), so that memslots (and other stuff) are protected
3218          */
3219         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3220
3221         do {
3222                 rc = vcpu_pre_run(vcpu);
3223                 if (rc)
3224                         break;
3225
3226                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3227                 /*
3228                  * As PF_VCPU will be used in fault handler, between
3229                  * guest_enter and guest_exit should be no uaccess.
3230                  */
3231                 local_irq_disable();
3232                 guest_enter_irqoff();
3233                 __disable_cpu_timer_accounting(vcpu);
3234                 local_irq_enable();
3235                 exit_reason = sie64a(vcpu->arch.sie_block,
3236                                      vcpu->run->s.regs.gprs);
3237                 local_irq_disable();
3238                 __enable_cpu_timer_accounting(vcpu);
3239                 guest_exit_irqoff();
3240                 local_irq_enable();
3241                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3242
3243                 rc = vcpu_post_run(vcpu, exit_reason);
3244         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3245
3246         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3247         return rc;
3248 }
3249
3250 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3251 {
3252         struct runtime_instr_cb *riccb;
3253         struct gs_cb *gscb;
3254
3255         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3256         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3257         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3258         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3259         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3260                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3261         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3262                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3263                 /* some control register changes require a tlb flush */
3264                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3265         }
3266         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3267                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3268                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3269                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3270                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3271                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3272         }
3273         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3274                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3275                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3276                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3277                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3278                         kvm_clear_async_pf_completion_queue(vcpu);
3279         }
3280         /*
3281          * If userspace sets the riccb (e.g. after migration) to a valid state,
3282          * we should enable RI here instead of doing the lazy enablement.
3283          */
3284         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3285             test_kvm_facility(vcpu->kvm, 64) &&
3286             riccb->v &&
3287             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3288                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3289                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3290         }
3291         /*
3292          * If userspace sets the gscb (e.g. after migration) to non-zero,
3293          * we should enable GS here instead of doing the lazy enablement.
3294          */
3295         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3296             test_kvm_facility(vcpu->kvm, 133) &&
3297             gscb->gssm &&
3298             !vcpu->arch.gs_enabled) {
3299                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3300                 vcpu->arch.sie_block->ecb |= ECB_GS;
3301                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3302                 vcpu->arch.gs_enabled = 1;
3303         }
3304         save_access_regs(vcpu->arch.host_acrs);
3305         restore_access_regs(vcpu->run->s.regs.acrs);
3306         /* save host (userspace) fprs/vrs */
3307         save_fpu_regs();
3308         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3309         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3310         if (MACHINE_HAS_VX)
3311                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3312         else
3313                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3314         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3315         if (test_fp_ctl(current->thread.fpu.fpc))
3316                 /* User space provided an invalid FPC, let's clear it */
3317                 current->thread.fpu.fpc = 0;
3318         if (MACHINE_HAS_GS) {
3319                 preempt_disable();
3320                 __ctl_set_bit(2, 4);
3321                 if (current->thread.gs_cb) {
3322                         vcpu->arch.host_gscb = current->thread.gs_cb;
3323                         save_gs_cb(vcpu->arch.host_gscb);
3324                 }
3325                 if (vcpu->arch.gs_enabled) {
3326                         current->thread.gs_cb = (struct gs_cb *)
3327                                                 &vcpu->run->s.regs.gscb;
3328                         restore_gs_cb(current->thread.gs_cb);
3329                 }
3330                 preempt_enable();
3331         }
3332
3333         kvm_run->kvm_dirty_regs = 0;
3334 }
3335
3336 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3337 {
3338         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3339         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3340         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3341         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3342         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3343         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3344         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3345         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3346         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3347         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3348         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3349         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3350         save_access_regs(vcpu->run->s.regs.acrs);
3351         restore_access_regs(vcpu->arch.host_acrs);
3352         /* Save guest register state */
3353         save_fpu_regs();
3354         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3355         /* Restore will be done lazily at return */
3356         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3357         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3358         if (MACHINE_HAS_GS) {
3359                 __ctl_set_bit(2, 4);
3360                 if (vcpu->arch.gs_enabled)
3361                         save_gs_cb(current->thread.gs_cb);
3362                 preempt_disable();
3363                 current->thread.gs_cb = vcpu->arch.host_gscb;
3364                 restore_gs_cb(vcpu->arch.host_gscb);
3365                 preempt_enable();
3366                 if (!vcpu->arch.host_gscb)
3367                         __ctl_clear_bit(2, 4);
3368                 vcpu->arch.host_gscb = NULL;
3369         }
3370
3371 }
3372
3373 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3374 {
3375         int rc;
3376         sigset_t sigsaved;
3377
3378         if (kvm_run->immediate_exit)
3379                 return -EINTR;
3380
3381         if (guestdbg_exit_pending(vcpu)) {
3382                 kvm_s390_prepare_debug_exit(vcpu);
3383                 return 0;
3384         }
3385
3386         if (vcpu->sigset_active)
3387                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3388
3389         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3390                 kvm_s390_vcpu_start(vcpu);
3391         } else if (is_vcpu_stopped(vcpu)) {
3392                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3393                                    vcpu->vcpu_id);
3394                 return -EINVAL;
3395         }
3396
3397         sync_regs(vcpu, kvm_run);
3398         enable_cpu_timer_accounting(vcpu);
3399
3400         might_fault();
3401         rc = __vcpu_run(vcpu);
3402
3403         if (signal_pending(current) && !rc) {
3404                 kvm_run->exit_reason = KVM_EXIT_INTR;
3405                 rc = -EINTR;
3406         }
3407
3408         if (guestdbg_exit_pending(vcpu) && !rc)  {
3409                 kvm_s390_prepare_debug_exit(vcpu);
3410                 rc = 0;
3411         }
3412
3413         if (rc == -EREMOTE) {
3414                 /* userspace support is needed, kvm_run has been prepared */
3415                 rc = 0;
3416         }
3417
3418         disable_cpu_timer_accounting(vcpu);
3419         store_regs(vcpu, kvm_run);
3420
3421         if (vcpu->sigset_active)
3422                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3423
3424         vcpu->stat.exit_userspace++;
3425         return rc;
3426 }
3427
3428 /*
3429  * store status at address
3430  * we use have two special cases:
3431  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3432  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3433  */
3434 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3435 {
3436         unsigned char archmode = 1;
3437         freg_t fprs[NUM_FPRS];
3438         unsigned int px;
3439         u64 clkcomp, cputm;
3440         int rc;
3441
3442         px = kvm_s390_get_prefix(vcpu);
3443         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3444                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3445                         return -EFAULT;
3446                 gpa = 0;
3447         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3448                 if (write_guest_real(vcpu, 163, &archmode, 1))
3449                         return -EFAULT;
3450                 gpa = px;
3451         } else
3452                 gpa -= __LC_FPREGS_SAVE_AREA;
3453
3454         /* manually convert vector registers if necessary */
3455         if (MACHINE_HAS_VX) {
3456                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3457                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3458                                      fprs, 128);
3459         } else {
3460                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3461                                      vcpu->run->s.regs.fprs, 128);
3462         }
3463         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3464                               vcpu->run->s.regs.gprs, 128);
3465         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3466                               &vcpu->arch.sie_block->gpsw, 16);
3467         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3468                               &px, 4);
3469         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3470                               &vcpu->run->s.regs.fpc, 4);
3471         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3472                               &vcpu->arch.sie_block->todpr, 4);
3473         cputm = kvm_s390_get_cpu_timer(vcpu);
3474         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3475                               &cputm, 8);
3476         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3477         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3478                               &clkcomp, 8);
3479         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3480                               &vcpu->run->s.regs.acrs, 64);
3481         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3482                               &vcpu->arch.sie_block->gcr, 128);
3483         return rc ? -EFAULT : 0;
3484 }
3485
3486 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3487 {
3488         /*
3489          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3490          * switch in the run ioctl. Let's update our copies before we save
3491          * it into the save area
3492          */
3493         save_fpu_regs();
3494         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3495         save_access_regs(vcpu->run->s.regs.acrs);
3496
3497         return kvm_s390_store_status_unloaded(vcpu, addr);
3498 }
3499
3500 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3501 {
3502         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3503         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3504 }
3505
3506 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3507 {
3508         unsigned int i;
3509         struct kvm_vcpu *vcpu;
3510
3511         kvm_for_each_vcpu(i, vcpu, kvm) {
3512                 __disable_ibs_on_vcpu(vcpu);
3513         }
3514 }
3515
3516 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3517 {
3518         if (!sclp.has_ibs)
3519                 return;
3520         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3521         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3522 }
3523
3524 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3525 {
3526         int i, online_vcpus, started_vcpus = 0;
3527
3528         if (!is_vcpu_stopped(vcpu))
3529                 return;
3530
3531         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3532         /* Only one cpu at a time may enter/leave the STOPPED state. */
3533         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3534         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3535
3536         for (i = 0; i < online_vcpus; i++) {
3537                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3538                         started_vcpus++;
3539         }
3540
3541         if (started_vcpus == 0) {
3542                 /* we're the only active VCPU -> speed it up */
3543                 __enable_ibs_on_vcpu(vcpu);
3544         } else if (started_vcpus == 1) {
3545                 /*
3546                  * As we are starting a second VCPU, we have to disable
3547                  * the IBS facility on all VCPUs to remove potentially
3548                  * oustanding ENABLE requests.
3549                  */
3550                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3551         }
3552
3553         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3554         /*
3555          * Another VCPU might have used IBS while we were offline.
3556          * Let's play safe and flush the VCPU at startup.
3557          */
3558         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3559         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3560         return;
3561 }
3562
3563 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3564 {
3565         int i, online_vcpus, started_vcpus = 0;
3566         struct kvm_vcpu *started_vcpu = NULL;
3567
3568         if (is_vcpu_stopped(vcpu))
3569                 return;
3570
3571         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3572         /* Only one cpu at a time may enter/leave the STOPPED state. */
3573         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3574         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3575
3576         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3577         kvm_s390_clear_stop_irq(vcpu);
3578
3579         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3580         __disable_ibs_on_vcpu(vcpu);
3581
3582         for (i = 0; i < online_vcpus; i++) {
3583                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3584                         started_vcpus++;
3585                         started_vcpu = vcpu->kvm->vcpus[i];
3586                 }
3587         }
3588
3589         if (started_vcpus == 1) {
3590                 /*
3591                  * As we only have one VCPU left, we want to enable the
3592                  * IBS facility for that VCPU to speed it up.
3593                  */
3594                 __enable_ibs_on_vcpu(started_vcpu);
3595         }
3596
3597         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3598         return;
3599 }
3600
3601 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3602                                      struct kvm_enable_cap *cap)
3603 {
3604         int r;
3605
3606         if (cap->flags)
3607                 return -EINVAL;
3608
3609         switch (cap->cap) {
3610         case KVM_CAP_S390_CSS_SUPPORT:
3611                 if (!vcpu->kvm->arch.css_support) {
3612                         vcpu->kvm->arch.css_support = 1;
3613                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3614                         trace_kvm_s390_enable_css(vcpu->kvm);
3615                 }
3616                 r = 0;
3617                 break;
3618         default:
3619                 r = -EINVAL;
3620                 break;
3621         }
3622         return r;
3623 }
3624
3625 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3626                                   struct kvm_s390_mem_op *mop)
3627 {
3628         void __user *uaddr = (void __user *)mop->buf;
3629         void *tmpbuf = NULL;
3630         int r, srcu_idx;
3631         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3632                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3633
3634         if (mop->flags & ~supported_flags)
3635                 return -EINVAL;
3636
3637         if (mop->size > MEM_OP_MAX_SIZE)
3638                 return -E2BIG;
3639
3640         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3641                 tmpbuf = vmalloc(mop->size);
3642                 if (!tmpbuf)
3643                         return -ENOMEM;
3644         }
3645
3646         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3647
3648         switch (mop->op) {
3649         case KVM_S390_MEMOP_LOGICAL_READ:
3650                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3651                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3652                                             mop->size, GACC_FETCH);
3653                         break;
3654                 }
3655                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3656                 if (r == 0) {
3657                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3658                                 r = -EFAULT;
3659                 }
3660                 break;
3661         case KVM_S390_MEMOP_LOGICAL_WRITE:
3662                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3663                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3664                                             mop->size, GACC_STORE);
3665                         break;
3666                 }
3667                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3668                         r = -EFAULT;
3669                         break;
3670                 }
3671                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3672                 break;
3673         default:
3674                 r = -EINVAL;
3675         }
3676
3677         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3678
3679         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3680                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3681
3682         vfree(tmpbuf);
3683         return r;
3684 }
3685
3686 long kvm_arch_vcpu_ioctl(struct file *filp,
3687                          unsigned int ioctl, unsigned long arg)
3688 {
3689         struct kvm_vcpu *vcpu = filp->private_data;
3690         void __user *argp = (void __user *)arg;
3691         int idx;
3692         long r;
3693
3694         switch (ioctl) {
3695         case KVM_S390_IRQ: {
3696                 struct kvm_s390_irq s390irq;
3697
3698                 r = -EFAULT;
3699                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3700                         break;
3701                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3702                 break;
3703         }
3704         case KVM_S390_INTERRUPT: {
3705                 struct kvm_s390_interrupt s390int;
3706                 struct kvm_s390_irq s390irq;
3707
3708                 r = -EFAULT;
3709                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3710                         break;
3711                 if (s390int_to_s390irq(&s390int, &s390irq))
3712                         return -EINVAL;
3713                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3714                 break;
3715         }
3716         case KVM_S390_STORE_STATUS:
3717                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3718                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3719                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3720                 break;
3721         case KVM_S390_SET_INITIAL_PSW: {
3722                 psw_t psw;
3723
3724                 r = -EFAULT;
3725                 if (copy_from_user(&psw, argp, sizeof(psw)))
3726                         break;
3727                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3728                 break;
3729         }
3730         case KVM_S390_INITIAL_RESET:
3731                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3732                 break;
3733         case KVM_SET_ONE_REG:
3734         case KVM_GET_ONE_REG: {
3735                 struct kvm_one_reg reg;
3736                 r = -EFAULT;
3737                 if (copy_from_user(&reg, argp, sizeof(reg)))
3738                         break;
3739                 if (ioctl == KVM_SET_ONE_REG)
3740                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3741                 else
3742                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3743                 break;
3744         }
3745 #ifdef CONFIG_KVM_S390_UCONTROL
3746         case KVM_S390_UCAS_MAP: {
3747                 struct kvm_s390_ucas_mapping ucasmap;
3748
3749                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3750                         r = -EFAULT;
3751                         break;
3752                 }
3753
3754                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3755                         r = -EINVAL;
3756                         break;
3757                 }
3758
3759                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3760                                      ucasmap.vcpu_addr, ucasmap.length);
3761                 break;
3762         }
3763         case KVM_S390_UCAS_UNMAP: {
3764                 struct kvm_s390_ucas_mapping ucasmap;
3765
3766                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3767                         r = -EFAULT;
3768                         break;
3769                 }
3770
3771                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3772                         r = -EINVAL;
3773                         break;
3774                 }
3775
3776                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3777                         ucasmap.length);
3778                 break;
3779         }
3780 #endif
3781         case KVM_S390_VCPU_FAULT: {
3782                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3783                 break;
3784         }
3785         case KVM_ENABLE_CAP:
3786         {
3787                 struct kvm_enable_cap cap;
3788                 r = -EFAULT;
3789                 if (copy_from_user(&cap, argp, sizeof(cap)))
3790                         break;
3791                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3792                 break;
3793         }
3794         case KVM_S390_MEM_OP: {
3795                 struct kvm_s390_mem_op mem_op;
3796
3797                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3798                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3799                 else
3800                         r = -EFAULT;
3801                 break;
3802         }
3803         case KVM_S390_SET_IRQ_STATE: {
3804                 struct kvm_s390_irq_state irq_state;
3805
3806                 r = -EFAULT;
3807                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3808                         break;
3809                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3810                     irq_state.len == 0 ||
3811                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3812                         r = -EINVAL;
3813                         break;
3814                 }
3815                 r = kvm_s390_set_irq_state(vcpu,
3816                                            (void __user *) irq_state.buf,
3817                                            irq_state.len);
3818                 break;
3819         }
3820         case KVM_S390_GET_IRQ_STATE: {
3821                 struct kvm_s390_irq_state irq_state;
3822
3823                 r = -EFAULT;
3824                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3825                         break;
3826                 if (irq_state.len == 0) {
3827                         r = -EINVAL;
3828                         break;
3829                 }
3830                 r = kvm_s390_get_irq_state(vcpu,
3831                                            (__u8 __user *)  irq_state.buf,
3832                                            irq_state.len);
3833                 break;
3834         }
3835         default:
3836                 r = -ENOTTY;
3837         }
3838         return r;
3839 }
3840
3841 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3842 {
3843 #ifdef CONFIG_KVM_S390_UCONTROL
3844         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3845                  && (kvm_is_ucontrol(vcpu->kvm))) {
3846                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3847                 get_page(vmf->page);
3848                 return 0;
3849         }
3850 #endif
3851         return VM_FAULT_SIGBUS;
3852 }
3853
3854 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3855                             unsigned long npages)
3856 {
3857         return 0;
3858 }
3859
3860 /* Section: memory related */
3861 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3862                                    struct kvm_memory_slot *memslot,
3863                                    const struct kvm_userspace_memory_region *mem,
3864                                    enum kvm_mr_change change)
3865 {
3866         /* A few sanity checks. We can have memory slots which have to be
3867            located/ended at a segment boundary (1MB). The memory in userland is
3868            ok to be fragmented into various different vmas. It is okay to mmap()
3869            and munmap() stuff in this slot after doing this call at any time */
3870
3871         if (mem->userspace_addr & 0xffffful)
3872                 return -EINVAL;
3873
3874         if (mem->memory_size & 0xffffful)
3875                 return -EINVAL;
3876
3877         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3878                 return -EINVAL;
3879
3880         return 0;
3881 }
3882
3883 void kvm_arch_commit_memory_region(struct kvm *kvm,
3884                                 const struct kvm_userspace_memory_region *mem,
3885                                 const struct kvm_memory_slot *old,
3886                                 const struct kvm_memory_slot *new,
3887                                 enum kvm_mr_change change)
3888 {
3889         int rc;
3890
3891         /* If the basics of the memslot do not change, we do not want
3892          * to update the gmap. Every update causes several unnecessary
3893          * segment translation exceptions. This is usually handled just
3894          * fine by the normal fault handler + gmap, but it will also
3895          * cause faults on the prefix page of running guest CPUs.
3896          */
3897         if (old->userspace_addr == mem->userspace_addr &&
3898             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3899             old->npages * PAGE_SIZE == mem->memory_size)
3900                 return;
3901
3902         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3903                 mem->guest_phys_addr, mem->memory_size);
3904         if (rc)
3905                 pr_warn("failed to commit memory region\n");
3906         return;
3907 }
3908
3909 static inline unsigned long nonhyp_mask(int i)
3910 {
3911         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3912
3913         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3914 }
3915
3916 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3917 {
3918         vcpu->valid_wakeup = false;
3919 }
3920
3921 static int __init kvm_s390_init(void)
3922 {
3923         int i;
3924
3925         if (!sclp.has_sief2) {
3926                 pr_info("SIE not available\n");
3927                 return -ENODEV;
3928         }
3929
3930         for (i = 0; i < 16; i++)
3931                 kvm_s390_fac_list_mask[i] |=
3932                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3933
3934         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3935 }
3936
3937 static void __exit kvm_s390_exit(void)
3938 {
3939         kvm_exit();
3940 }
3941
3942 module_init(kvm_s390_init);
3943 module_exit(kvm_s390_exit);
3944
3945 /*
3946  * Enable autoloading of the kvm module.
3947  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3948  * since x86 takes a different approach.
3949  */
3950 #include <linux/miscdevice.h>
3951 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3952 MODULE_ALIAS("devname:kvm");