KVM: s390: interpretive execution of SIGP EXTERNAL CALL
[linux-2.6-block.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <asm/asm-offsets.h>
28 #include <asm/lowcore.h>
29 #include <asm/pgtable.h>
30 #include <asm/nmi.h>
31 #include <asm/switch_to.h>
32 #include <asm/facility.h>
33 #include <asm/sclp.h>
34 #include "kvm-s390.h"
35 #include "gaccess.h"
36
37 #define CREATE_TRACE_POINTS
38 #include "trace.h"
39 #include "trace-s390.h"
40
41 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
42
43 struct kvm_stats_debugfs_item debugfs_entries[] = {
44         { "userspace_handled", VCPU_STAT(exit_userspace) },
45         { "exit_null", VCPU_STAT(exit_null) },
46         { "exit_validity", VCPU_STAT(exit_validity) },
47         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
48         { "exit_external_request", VCPU_STAT(exit_external_request) },
49         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
50         { "exit_instruction", VCPU_STAT(exit_instruction) },
51         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
52         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
53         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
54         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
55         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
56         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
57         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
58         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
59         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
60         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
61         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
62         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
63         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
64         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
65         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
66         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
67         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
68         { "instruction_spx", VCPU_STAT(instruction_spx) },
69         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
70         { "instruction_stap", VCPU_STAT(instruction_stap) },
71         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
72         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
73         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
74         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
75         { "instruction_essa", VCPU_STAT(instruction_essa) },
76         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
77         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
78         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
79         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
80         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
81         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
82         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
83         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
84         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
85         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
86         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
87         { "diagnose_10", VCPU_STAT(diagnose_10) },
88         { "diagnose_44", VCPU_STAT(diagnose_44) },
89         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
90         { NULL }
91 };
92
93 unsigned long *vfacilities;
94 static struct gmap_notifier gmap_notifier;
95
96 /* test availability of vfacility */
97 int test_vfacility(unsigned long nr)
98 {
99         return __test_facility(nr, (void *) vfacilities);
100 }
101
102 /* Section: not file related */
103 int kvm_arch_hardware_enable(void *garbage)
104 {
105         /* every s390 is virtualization enabled ;-) */
106         return 0;
107 }
108
109 void kvm_arch_hardware_disable(void *garbage)
110 {
111 }
112
113 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
114
115 int kvm_arch_hardware_setup(void)
116 {
117         gmap_notifier.notifier_call = kvm_gmap_notifier;
118         gmap_register_ipte_notifier(&gmap_notifier);
119         return 0;
120 }
121
122 void kvm_arch_hardware_unsetup(void)
123 {
124         gmap_unregister_ipte_notifier(&gmap_notifier);
125 }
126
127 void kvm_arch_check_processor_compat(void *rtn)
128 {
129 }
130
131 int kvm_arch_init(void *opaque)
132 {
133         return 0;
134 }
135
136 void kvm_arch_exit(void)
137 {
138 }
139
140 /* Section: device related */
141 long kvm_arch_dev_ioctl(struct file *filp,
142                         unsigned int ioctl, unsigned long arg)
143 {
144         if (ioctl == KVM_S390_ENABLE_SIE)
145                 return s390_enable_sie();
146         return -EINVAL;
147 }
148
149 int kvm_dev_ioctl_check_extension(long ext)
150 {
151         int r;
152
153         switch (ext) {
154         case KVM_CAP_S390_PSW:
155         case KVM_CAP_S390_GMAP:
156         case KVM_CAP_SYNC_MMU:
157 #ifdef CONFIG_KVM_S390_UCONTROL
158         case KVM_CAP_S390_UCONTROL:
159 #endif
160         case KVM_CAP_ASYNC_PF:
161         case KVM_CAP_SYNC_REGS:
162         case KVM_CAP_ONE_REG:
163         case KVM_CAP_ENABLE_CAP:
164         case KVM_CAP_S390_CSS_SUPPORT:
165         case KVM_CAP_IOEVENTFD:
166         case KVM_CAP_DEVICE_CTRL:
167         case KVM_CAP_ENABLE_CAP_VM:
168         case KVM_CAP_VM_ATTRIBUTES:
169                 r = 1;
170                 break;
171         case KVM_CAP_NR_VCPUS:
172         case KVM_CAP_MAX_VCPUS:
173                 r = KVM_MAX_VCPUS;
174                 break;
175         case KVM_CAP_NR_MEMSLOTS:
176                 r = KVM_USER_MEM_SLOTS;
177                 break;
178         case KVM_CAP_S390_COW:
179                 r = MACHINE_HAS_ESOP;
180                 break;
181         default:
182                 r = 0;
183         }
184         return r;
185 }
186
187 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
188                                         struct kvm_memory_slot *memslot)
189 {
190         gfn_t cur_gfn, last_gfn;
191         unsigned long address;
192         struct gmap *gmap = kvm->arch.gmap;
193
194         down_read(&gmap->mm->mmap_sem);
195         /* Loop over all guest pages */
196         last_gfn = memslot->base_gfn + memslot->npages;
197         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
198                 address = gfn_to_hva_memslot(memslot, cur_gfn);
199
200                 if (gmap_test_and_clear_dirty(address, gmap))
201                         mark_page_dirty(kvm, cur_gfn);
202         }
203         up_read(&gmap->mm->mmap_sem);
204 }
205
206 /* Section: vm related */
207 /*
208  * Get (and clear) the dirty memory log for a memory slot.
209  */
210 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
211                                struct kvm_dirty_log *log)
212 {
213         int r;
214         unsigned long n;
215         struct kvm_memory_slot *memslot;
216         int is_dirty = 0;
217
218         mutex_lock(&kvm->slots_lock);
219
220         r = -EINVAL;
221         if (log->slot >= KVM_USER_MEM_SLOTS)
222                 goto out;
223
224         memslot = id_to_memslot(kvm->memslots, log->slot);
225         r = -ENOENT;
226         if (!memslot->dirty_bitmap)
227                 goto out;
228
229         kvm_s390_sync_dirty_log(kvm, memslot);
230         r = kvm_get_dirty_log(kvm, log, &is_dirty);
231         if (r)
232                 goto out;
233
234         /* Clear the dirty log */
235         if (is_dirty) {
236                 n = kvm_dirty_bitmap_bytes(memslot);
237                 memset(memslot->dirty_bitmap, 0, n);
238         }
239         r = 0;
240 out:
241         mutex_unlock(&kvm->slots_lock);
242         return r;
243 }
244
245 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
246 {
247         int r;
248
249         if (cap->flags)
250                 return -EINVAL;
251
252         switch (cap->cap) {
253         case KVM_CAP_S390_IRQCHIP:
254                 kvm->arch.use_irqchip = 1;
255                 r = 0;
256                 break;
257         default:
258                 r = -EINVAL;
259                 break;
260         }
261         return r;
262 }
263
264 static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
265 {
266         int ret;
267         unsigned int idx;
268         switch (attr->attr) {
269         case KVM_S390_VM_MEM_ENABLE_CMMA:
270                 ret = -EBUSY;
271                 mutex_lock(&kvm->lock);
272                 if (atomic_read(&kvm->online_vcpus) == 0) {
273                         kvm->arch.use_cmma = 1;
274                         ret = 0;
275                 }
276                 mutex_unlock(&kvm->lock);
277                 break;
278         case KVM_S390_VM_MEM_CLR_CMMA:
279                 mutex_lock(&kvm->lock);
280                 idx = srcu_read_lock(&kvm->srcu);
281                 page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false);
282                 srcu_read_unlock(&kvm->srcu, idx);
283                 mutex_unlock(&kvm->lock);
284                 ret = 0;
285                 break;
286         default:
287                 ret = -ENXIO;
288                 break;
289         }
290         return ret;
291 }
292
293 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
294 {
295         int ret;
296
297         switch (attr->group) {
298         case KVM_S390_VM_MEM_CTRL:
299                 ret = kvm_s390_mem_control(kvm, attr);
300                 break;
301         default:
302                 ret = -ENXIO;
303                 break;
304         }
305
306         return ret;
307 }
308
309 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
310 {
311         return -ENXIO;
312 }
313
314 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
315 {
316         int ret;
317
318         switch (attr->group) {
319         case KVM_S390_VM_MEM_CTRL:
320                 switch (attr->attr) {
321                 case KVM_S390_VM_MEM_ENABLE_CMMA:
322                 case KVM_S390_VM_MEM_CLR_CMMA:
323                         ret = 0;
324                         break;
325                 default:
326                         ret = -ENXIO;
327                         break;
328                 }
329                 break;
330         default:
331                 ret = -ENXIO;
332                 break;
333         }
334
335         return ret;
336 }
337
338 long kvm_arch_vm_ioctl(struct file *filp,
339                        unsigned int ioctl, unsigned long arg)
340 {
341         struct kvm *kvm = filp->private_data;
342         void __user *argp = (void __user *)arg;
343         struct kvm_device_attr attr;
344         int r;
345
346         switch (ioctl) {
347         case KVM_S390_INTERRUPT: {
348                 struct kvm_s390_interrupt s390int;
349
350                 r = -EFAULT;
351                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
352                         break;
353                 r = kvm_s390_inject_vm(kvm, &s390int);
354                 break;
355         }
356         case KVM_ENABLE_CAP: {
357                 struct kvm_enable_cap cap;
358                 r = -EFAULT;
359                 if (copy_from_user(&cap, argp, sizeof(cap)))
360                         break;
361                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
362                 break;
363         }
364         case KVM_CREATE_IRQCHIP: {
365                 struct kvm_irq_routing_entry routing;
366
367                 r = -EINVAL;
368                 if (kvm->arch.use_irqchip) {
369                         /* Set up dummy routing. */
370                         memset(&routing, 0, sizeof(routing));
371                         kvm_set_irq_routing(kvm, &routing, 0, 0);
372                         r = 0;
373                 }
374                 break;
375         }
376         case KVM_SET_DEVICE_ATTR: {
377                 r = -EFAULT;
378                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
379                         break;
380                 r = kvm_s390_vm_set_attr(kvm, &attr);
381                 break;
382         }
383         case KVM_GET_DEVICE_ATTR: {
384                 r = -EFAULT;
385                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
386                         break;
387                 r = kvm_s390_vm_get_attr(kvm, &attr);
388                 break;
389         }
390         case KVM_HAS_DEVICE_ATTR: {
391                 r = -EFAULT;
392                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
393                         break;
394                 r = kvm_s390_vm_has_attr(kvm, &attr);
395                 break;
396         }
397         default:
398                 r = -ENOTTY;
399         }
400
401         return r;
402 }
403
404 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
405 {
406         int rc;
407         char debug_name[16];
408         static unsigned long sca_offset;
409
410         rc = -EINVAL;
411 #ifdef CONFIG_KVM_S390_UCONTROL
412         if (type & ~KVM_VM_S390_UCONTROL)
413                 goto out_err;
414         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
415                 goto out_err;
416 #else
417         if (type)
418                 goto out_err;
419 #endif
420
421         rc = s390_enable_sie();
422         if (rc)
423                 goto out_err;
424
425         rc = -ENOMEM;
426
427         kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
428         if (!kvm->arch.sca)
429                 goto out_err;
430         spin_lock(&kvm_lock);
431         sca_offset = (sca_offset + 16) & 0x7f0;
432         kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
433         spin_unlock(&kvm_lock);
434
435         sprintf(debug_name, "kvm-%u", current->pid);
436
437         kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
438         if (!kvm->arch.dbf)
439                 goto out_nodbf;
440
441         spin_lock_init(&kvm->arch.float_int.lock);
442         INIT_LIST_HEAD(&kvm->arch.float_int.list);
443         init_waitqueue_head(&kvm->arch.ipte_wq);
444
445         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
446         VM_EVENT(kvm, 3, "%s", "vm created");
447
448         if (type & KVM_VM_S390_UCONTROL) {
449                 kvm->arch.gmap = NULL;
450         } else {
451                 kvm->arch.gmap = gmap_alloc(current->mm);
452                 if (!kvm->arch.gmap)
453                         goto out_nogmap;
454                 kvm->arch.gmap->private = kvm;
455                 kvm->arch.gmap->pfault_enabled = 0;
456         }
457
458         kvm->arch.css_support = 0;
459         kvm->arch.use_irqchip = 0;
460
461         spin_lock_init(&kvm->arch.start_stop_lock);
462
463         return 0;
464 out_nogmap:
465         debug_unregister(kvm->arch.dbf);
466 out_nodbf:
467         free_page((unsigned long)(kvm->arch.sca));
468 out_err:
469         return rc;
470 }
471
472 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
473 {
474         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
475         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
476         kvm_s390_clear_local_irqs(vcpu);
477         kvm_clear_async_pf_completion_queue(vcpu);
478         if (!kvm_is_ucontrol(vcpu->kvm)) {
479                 clear_bit(63 - vcpu->vcpu_id,
480                           (unsigned long *) &vcpu->kvm->arch.sca->mcn);
481                 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
482                     (__u64) vcpu->arch.sie_block)
483                         vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
484         }
485         smp_mb();
486
487         if (kvm_is_ucontrol(vcpu->kvm))
488                 gmap_free(vcpu->arch.gmap);
489
490         if (kvm_s390_cmma_enabled(vcpu->kvm))
491                 kvm_s390_vcpu_unsetup_cmma(vcpu);
492         free_page((unsigned long)(vcpu->arch.sie_block));
493
494         kvm_vcpu_uninit(vcpu);
495         kmem_cache_free(kvm_vcpu_cache, vcpu);
496 }
497
498 static void kvm_free_vcpus(struct kvm *kvm)
499 {
500         unsigned int i;
501         struct kvm_vcpu *vcpu;
502
503         kvm_for_each_vcpu(i, vcpu, kvm)
504                 kvm_arch_vcpu_destroy(vcpu);
505
506         mutex_lock(&kvm->lock);
507         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
508                 kvm->vcpus[i] = NULL;
509
510         atomic_set(&kvm->online_vcpus, 0);
511         mutex_unlock(&kvm->lock);
512 }
513
514 void kvm_arch_sync_events(struct kvm *kvm)
515 {
516 }
517
518 void kvm_arch_destroy_vm(struct kvm *kvm)
519 {
520         kvm_free_vcpus(kvm);
521         free_page((unsigned long)(kvm->arch.sca));
522         debug_unregister(kvm->arch.dbf);
523         if (!kvm_is_ucontrol(kvm))
524                 gmap_free(kvm->arch.gmap);
525         kvm_s390_destroy_adapters(kvm);
526         kvm_s390_clear_float_irqs(kvm);
527 }
528
529 /* Section: vcpu related */
530 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
531 {
532         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
533         kvm_clear_async_pf_completion_queue(vcpu);
534         if (kvm_is_ucontrol(vcpu->kvm)) {
535                 vcpu->arch.gmap = gmap_alloc(current->mm);
536                 if (!vcpu->arch.gmap)
537                         return -ENOMEM;
538                 vcpu->arch.gmap->private = vcpu->kvm;
539                 return 0;
540         }
541
542         vcpu->arch.gmap = vcpu->kvm->arch.gmap;
543         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
544                                     KVM_SYNC_GPRS |
545                                     KVM_SYNC_ACRS |
546                                     KVM_SYNC_CRS;
547         return 0;
548 }
549
550 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
551 {
552         /* Nothing todo */
553 }
554
555 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
556 {
557         save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
558         save_fp_regs(vcpu->arch.host_fpregs.fprs);
559         save_access_regs(vcpu->arch.host_acrs);
560         restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
561         restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
562         restore_access_regs(vcpu->run->s.regs.acrs);
563         gmap_enable(vcpu->arch.gmap);
564         atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
565 }
566
567 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
568 {
569         atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
570         gmap_disable(vcpu->arch.gmap);
571         save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
572         save_fp_regs(vcpu->arch.guest_fpregs.fprs);
573         save_access_regs(vcpu->run->s.regs.acrs);
574         restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
575         restore_fp_regs(vcpu->arch.host_fpregs.fprs);
576         restore_access_regs(vcpu->arch.host_acrs);
577 }
578
579 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
580 {
581         /* this equals initial cpu reset in pop, but we don't switch to ESA */
582         vcpu->arch.sie_block->gpsw.mask = 0UL;
583         vcpu->arch.sie_block->gpsw.addr = 0UL;
584         kvm_s390_set_prefix(vcpu, 0);
585         vcpu->arch.sie_block->cputm     = 0UL;
586         vcpu->arch.sie_block->ckc       = 0UL;
587         vcpu->arch.sie_block->todpr     = 0;
588         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
589         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
590         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
591         vcpu->arch.guest_fpregs.fpc = 0;
592         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
593         vcpu->arch.sie_block->gbea = 1;
594         vcpu->arch.sie_block->pp = 0;
595         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
596         kvm_clear_async_pf_completion_queue(vcpu);
597         kvm_s390_vcpu_stop(vcpu);
598         kvm_s390_clear_local_irqs(vcpu);
599 }
600
601 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
602 {
603         return 0;
604 }
605
606 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
607 {
608         free_page(vcpu->arch.sie_block->cbrlo);
609         vcpu->arch.sie_block->cbrlo = 0;
610 }
611
612 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
613 {
614         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
615         if (!vcpu->arch.sie_block->cbrlo)
616                 return -ENOMEM;
617
618         vcpu->arch.sie_block->ecb2 |= 0x80;
619         vcpu->arch.sie_block->ecb2 &= ~0x08;
620         return 0;
621 }
622
623 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
624 {
625         int rc = 0;
626
627         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
628                                                     CPUSTAT_SM |
629                                                     CPUSTAT_STOPPED |
630                                                     CPUSTAT_GED);
631         vcpu->arch.sie_block->ecb   = 6;
632         if (test_vfacility(50) && test_vfacility(73))
633                 vcpu->arch.sie_block->ecb |= 0x10;
634
635         vcpu->arch.sie_block->ecb2  = 8;
636         vcpu->arch.sie_block->eca   = 0xD1002000U;
637         if (sclp_has_siif())
638                 vcpu->arch.sie_block->eca |= 1;
639         vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
640         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
641         if (kvm_s390_cmma_enabled(vcpu->kvm)) {
642                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
643                 if (rc)
644                         return rc;
645         }
646         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
647         tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
648                      (unsigned long) vcpu);
649         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
650         get_cpu_id(&vcpu->arch.cpu_id);
651         vcpu->arch.cpu_id.version = 0xff;
652         return rc;
653 }
654
655 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
656                                       unsigned int id)
657 {
658         struct kvm_vcpu *vcpu;
659         struct sie_page *sie_page;
660         int rc = -EINVAL;
661
662         if (id >= KVM_MAX_VCPUS)
663                 goto out;
664
665         rc = -ENOMEM;
666
667         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
668         if (!vcpu)
669                 goto out;
670
671         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
672         if (!sie_page)
673                 goto out_free_cpu;
674
675         vcpu->arch.sie_block = &sie_page->sie_block;
676         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
677
678         vcpu->arch.sie_block->icpua = id;
679         if (!kvm_is_ucontrol(kvm)) {
680                 if (!kvm->arch.sca) {
681                         WARN_ON_ONCE(1);
682                         goto out_free_cpu;
683                 }
684                 if (!kvm->arch.sca->cpu[id].sda)
685                         kvm->arch.sca->cpu[id].sda =
686                                 (__u64) vcpu->arch.sie_block;
687                 vcpu->arch.sie_block->scaoh =
688                         (__u32)(((__u64)kvm->arch.sca) >> 32);
689                 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
690                 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
691         }
692
693         spin_lock_init(&vcpu->arch.local_int.lock);
694         INIT_LIST_HEAD(&vcpu->arch.local_int.list);
695         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
696         vcpu->arch.local_int.wq = &vcpu->wq;
697         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
698
699         rc = kvm_vcpu_init(vcpu, kvm, id);
700         if (rc)
701                 goto out_free_sie_block;
702         VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
703                  vcpu->arch.sie_block);
704         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
705
706         return vcpu;
707 out_free_sie_block:
708         free_page((unsigned long)(vcpu->arch.sie_block));
709 out_free_cpu:
710         kmem_cache_free(kvm_vcpu_cache, vcpu);
711 out:
712         return ERR_PTR(rc);
713 }
714
715 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
716 {
717         return kvm_cpu_has_interrupt(vcpu);
718 }
719
720 void s390_vcpu_block(struct kvm_vcpu *vcpu)
721 {
722         atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
723 }
724
725 void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
726 {
727         atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
728 }
729
730 /*
731  * Kick a guest cpu out of SIE and wait until SIE is not running.
732  * If the CPU is not running (e.g. waiting as idle) the function will
733  * return immediately. */
734 void exit_sie(struct kvm_vcpu *vcpu)
735 {
736         atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
737         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
738                 cpu_relax();
739 }
740
741 /* Kick a guest cpu out of SIE and prevent SIE-reentry */
742 void exit_sie_sync(struct kvm_vcpu *vcpu)
743 {
744         s390_vcpu_block(vcpu);
745         exit_sie(vcpu);
746 }
747
748 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
749 {
750         int i;
751         struct kvm *kvm = gmap->private;
752         struct kvm_vcpu *vcpu;
753
754         kvm_for_each_vcpu(i, vcpu, kvm) {
755                 /* match against both prefix pages */
756                 if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
757                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
758                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
759                         exit_sie_sync(vcpu);
760                 }
761         }
762 }
763
764 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
765 {
766         /* kvm common code refers to this, but never calls it */
767         BUG();
768         return 0;
769 }
770
771 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
772                                            struct kvm_one_reg *reg)
773 {
774         int r = -EINVAL;
775
776         switch (reg->id) {
777         case KVM_REG_S390_TODPR:
778                 r = put_user(vcpu->arch.sie_block->todpr,
779                              (u32 __user *)reg->addr);
780                 break;
781         case KVM_REG_S390_EPOCHDIFF:
782                 r = put_user(vcpu->arch.sie_block->epoch,
783                              (u64 __user *)reg->addr);
784                 break;
785         case KVM_REG_S390_CPU_TIMER:
786                 r = put_user(vcpu->arch.sie_block->cputm,
787                              (u64 __user *)reg->addr);
788                 break;
789         case KVM_REG_S390_CLOCK_COMP:
790                 r = put_user(vcpu->arch.sie_block->ckc,
791                              (u64 __user *)reg->addr);
792                 break;
793         case KVM_REG_S390_PFTOKEN:
794                 r = put_user(vcpu->arch.pfault_token,
795                              (u64 __user *)reg->addr);
796                 break;
797         case KVM_REG_S390_PFCOMPARE:
798                 r = put_user(vcpu->arch.pfault_compare,
799                              (u64 __user *)reg->addr);
800                 break;
801         case KVM_REG_S390_PFSELECT:
802                 r = put_user(vcpu->arch.pfault_select,
803                              (u64 __user *)reg->addr);
804                 break;
805         case KVM_REG_S390_PP:
806                 r = put_user(vcpu->arch.sie_block->pp,
807                              (u64 __user *)reg->addr);
808                 break;
809         case KVM_REG_S390_GBEA:
810                 r = put_user(vcpu->arch.sie_block->gbea,
811                              (u64 __user *)reg->addr);
812                 break;
813         default:
814                 break;
815         }
816
817         return r;
818 }
819
820 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
821                                            struct kvm_one_reg *reg)
822 {
823         int r = -EINVAL;
824
825         switch (reg->id) {
826         case KVM_REG_S390_TODPR:
827                 r = get_user(vcpu->arch.sie_block->todpr,
828                              (u32 __user *)reg->addr);
829                 break;
830         case KVM_REG_S390_EPOCHDIFF:
831                 r = get_user(vcpu->arch.sie_block->epoch,
832                              (u64 __user *)reg->addr);
833                 break;
834         case KVM_REG_S390_CPU_TIMER:
835                 r = get_user(vcpu->arch.sie_block->cputm,
836                              (u64 __user *)reg->addr);
837                 break;
838         case KVM_REG_S390_CLOCK_COMP:
839                 r = get_user(vcpu->arch.sie_block->ckc,
840                              (u64 __user *)reg->addr);
841                 break;
842         case KVM_REG_S390_PFTOKEN:
843                 r = get_user(vcpu->arch.pfault_token,
844                              (u64 __user *)reg->addr);
845                 break;
846         case KVM_REG_S390_PFCOMPARE:
847                 r = get_user(vcpu->arch.pfault_compare,
848                              (u64 __user *)reg->addr);
849                 break;
850         case KVM_REG_S390_PFSELECT:
851                 r = get_user(vcpu->arch.pfault_select,
852                              (u64 __user *)reg->addr);
853                 break;
854         case KVM_REG_S390_PP:
855                 r = get_user(vcpu->arch.sie_block->pp,
856                              (u64 __user *)reg->addr);
857                 break;
858         case KVM_REG_S390_GBEA:
859                 r = get_user(vcpu->arch.sie_block->gbea,
860                              (u64 __user *)reg->addr);
861                 break;
862         default:
863                 break;
864         }
865
866         return r;
867 }
868
869 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
870 {
871         kvm_s390_vcpu_initial_reset(vcpu);
872         return 0;
873 }
874
875 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
876 {
877         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
878         return 0;
879 }
880
881 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
882 {
883         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
884         return 0;
885 }
886
887 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
888                                   struct kvm_sregs *sregs)
889 {
890         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
891         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
892         restore_access_regs(vcpu->run->s.regs.acrs);
893         return 0;
894 }
895
896 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
897                                   struct kvm_sregs *sregs)
898 {
899         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
900         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
901         return 0;
902 }
903
904 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
905 {
906         if (test_fp_ctl(fpu->fpc))
907                 return -EINVAL;
908         memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
909         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
910         restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
911         restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
912         return 0;
913 }
914
915 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
916 {
917         memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
918         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
919         return 0;
920 }
921
922 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
923 {
924         int rc = 0;
925
926         if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
927                 rc = -EBUSY;
928         else {
929                 vcpu->run->psw_mask = psw.mask;
930                 vcpu->run->psw_addr = psw.addr;
931         }
932         return rc;
933 }
934
935 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
936                                   struct kvm_translation *tr)
937 {
938         return -EINVAL; /* not implemented yet */
939 }
940
941 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
942                               KVM_GUESTDBG_USE_HW_BP | \
943                               KVM_GUESTDBG_ENABLE)
944
945 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
946                                         struct kvm_guest_debug *dbg)
947 {
948         int rc = 0;
949
950         vcpu->guest_debug = 0;
951         kvm_s390_clear_bp_data(vcpu);
952
953         if (vcpu->guest_debug & ~VALID_GUESTDBG_FLAGS)
954                 return -EINVAL;
955
956         if (dbg->control & KVM_GUESTDBG_ENABLE) {
957                 vcpu->guest_debug = dbg->control;
958                 /* enforce guest PER */
959                 atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
960
961                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
962                         rc = kvm_s390_import_bp_data(vcpu, dbg);
963         } else {
964                 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
965                 vcpu->arch.guestdbg.last_bp = 0;
966         }
967
968         if (rc) {
969                 vcpu->guest_debug = 0;
970                 kvm_s390_clear_bp_data(vcpu);
971                 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
972         }
973
974         return rc;
975 }
976
977 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
978                                     struct kvm_mp_state *mp_state)
979 {
980         return -EINVAL; /* not implemented yet */
981 }
982
983 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
984                                     struct kvm_mp_state *mp_state)
985 {
986         return -EINVAL; /* not implemented yet */
987 }
988
989 bool kvm_s390_cmma_enabled(struct kvm *kvm)
990 {
991         if (!MACHINE_IS_LPAR)
992                 return false;
993         /* only enable for z10 and later */
994         if (!MACHINE_HAS_EDAT1)
995                 return false;
996         if (!kvm->arch.use_cmma)
997                 return false;
998         return true;
999 }
1000
1001 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1002 {
1003         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1004 }
1005
1006 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1007 {
1008 retry:
1009         s390_vcpu_unblock(vcpu);
1010         /*
1011          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1012          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1013          * This ensures that the ipte instruction for this request has
1014          * already finished. We might race against a second unmapper that
1015          * wants to set the blocking bit. Lets just retry the request loop.
1016          */
1017         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1018                 int rc;
1019                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1020                                       vcpu->arch.sie_block->prefix,
1021                                       PAGE_SIZE * 2);
1022                 if (rc)
1023                         return rc;
1024                 goto retry;
1025         }
1026
1027         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1028                 if (!ibs_enabled(vcpu)) {
1029                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1030                         atomic_set_mask(CPUSTAT_IBS,
1031                                         &vcpu->arch.sie_block->cpuflags);
1032                 }
1033                 goto retry;
1034         }
1035
1036         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1037                 if (ibs_enabled(vcpu)) {
1038                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1039                         atomic_clear_mask(CPUSTAT_IBS,
1040                                           &vcpu->arch.sie_block->cpuflags);
1041                 }
1042                 goto retry;
1043         }
1044
1045         return 0;
1046 }
1047
1048 /**
1049  * kvm_arch_fault_in_page - fault-in guest page if necessary
1050  * @vcpu: The corresponding virtual cpu
1051  * @gpa: Guest physical address
1052  * @writable: Whether the page should be writable or not
1053  *
1054  * Make sure that a guest page has been faulted-in on the host.
1055  *
1056  * Return: Zero on success, negative error code otherwise.
1057  */
1058 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1059 {
1060         struct mm_struct *mm = current->mm;
1061         hva_t hva;
1062         long rc;
1063
1064         hva = gmap_fault(gpa, vcpu->arch.gmap);
1065         if (IS_ERR_VALUE(hva))
1066                 return (long)hva;
1067         down_read(&mm->mmap_sem);
1068         rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL);
1069         up_read(&mm->mmap_sem);
1070
1071         return rc < 0 ? rc : 0;
1072 }
1073
1074 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1075                                       unsigned long token)
1076 {
1077         struct kvm_s390_interrupt inti;
1078         inti.parm64 = token;
1079
1080         if (start_token) {
1081                 inti.type = KVM_S390_INT_PFAULT_INIT;
1082                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
1083         } else {
1084                 inti.type = KVM_S390_INT_PFAULT_DONE;
1085                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1086         }
1087 }
1088
1089 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1090                                      struct kvm_async_pf *work)
1091 {
1092         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1093         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1094 }
1095
1096 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1097                                  struct kvm_async_pf *work)
1098 {
1099         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1100         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1101 }
1102
1103 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1104                                struct kvm_async_pf *work)
1105 {
1106         /* s390 will always inject the page directly */
1107 }
1108
1109 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1110 {
1111         /*
1112          * s390 will always inject the page directly,
1113          * but we still want check_async_completion to cleanup
1114          */
1115         return true;
1116 }
1117
1118 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1119 {
1120         hva_t hva;
1121         struct kvm_arch_async_pf arch;
1122         int rc;
1123
1124         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1125                 return 0;
1126         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1127             vcpu->arch.pfault_compare)
1128                 return 0;
1129         if (psw_extint_disabled(vcpu))
1130                 return 0;
1131         if (kvm_cpu_has_interrupt(vcpu))
1132                 return 0;
1133         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1134                 return 0;
1135         if (!vcpu->arch.gmap->pfault_enabled)
1136                 return 0;
1137
1138         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1139         hva += current->thread.gmap_addr & ~PAGE_MASK;
1140         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1141                 return 0;
1142
1143         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
1144         return rc;
1145 }
1146
1147 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
1148 {
1149         int rc, cpuflags;
1150
1151         /*
1152          * On s390 notifications for arriving pages will be delivered directly
1153          * to the guest but the house keeping for completed pfaults is
1154          * handled outside the worker.
1155          */
1156         kvm_check_async_pf_completion(vcpu);
1157
1158         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
1159
1160         if (need_resched())
1161                 schedule();
1162
1163         if (test_thread_flag(TIF_MCCK_PENDING))
1164                 s390_handle_mcck();
1165
1166         if (!kvm_is_ucontrol(vcpu->kvm))
1167                 kvm_s390_deliver_pending_interrupts(vcpu);
1168
1169         rc = kvm_s390_handle_requests(vcpu);
1170         if (rc)
1171                 return rc;
1172
1173         if (guestdbg_enabled(vcpu)) {
1174                 kvm_s390_backup_guest_per_regs(vcpu);
1175                 kvm_s390_patch_guest_per_regs(vcpu);
1176         }
1177
1178         vcpu->arch.sie_block->icptcode = 0;
1179         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
1180         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
1181         trace_kvm_s390_sie_enter(vcpu, cpuflags);
1182
1183         return 0;
1184 }
1185
1186 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
1187 {
1188         int rc = -1;
1189
1190         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
1191                    vcpu->arch.sie_block->icptcode);
1192         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
1193
1194         if (guestdbg_enabled(vcpu))
1195                 kvm_s390_restore_guest_per_regs(vcpu);
1196
1197         if (exit_reason >= 0) {
1198                 rc = 0;
1199         } else if (kvm_is_ucontrol(vcpu->kvm)) {
1200                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
1201                 vcpu->run->s390_ucontrol.trans_exc_code =
1202                                                 current->thread.gmap_addr;
1203                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
1204                 rc = -EREMOTE;
1205
1206         } else if (current->thread.gmap_pfault) {
1207                 trace_kvm_s390_major_guest_pfault(vcpu);
1208                 current->thread.gmap_pfault = 0;
1209                 if (kvm_arch_setup_async_pf(vcpu)) {
1210                         rc = 0;
1211                 } else {
1212                         gpa_t gpa = current->thread.gmap_addr;
1213                         rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
1214                 }
1215         }
1216
1217         if (rc == -1) {
1218                 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
1219                 trace_kvm_s390_sie_fault(vcpu);
1220                 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
1221         }
1222
1223         memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
1224
1225         if (rc == 0) {
1226                 if (kvm_is_ucontrol(vcpu->kvm))
1227                         /* Don't exit for host interrupts. */
1228                         rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
1229                 else
1230                         rc = kvm_handle_sie_intercept(vcpu);
1231         }
1232
1233         return rc;
1234 }
1235
1236 static int __vcpu_run(struct kvm_vcpu *vcpu)
1237 {
1238         int rc, exit_reason;
1239
1240         /*
1241          * We try to hold kvm->srcu during most of vcpu_run (except when run-
1242          * ning the guest), so that memslots (and other stuff) are protected
1243          */
1244         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1245
1246         do {
1247                 rc = vcpu_pre_run(vcpu);
1248                 if (rc)
1249                         break;
1250
1251                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
1252                 /*
1253                  * As PF_VCPU will be used in fault handler, between
1254                  * guest_enter and guest_exit should be no uaccess.
1255                  */
1256                 preempt_disable();
1257                 kvm_guest_enter();
1258                 preempt_enable();
1259                 exit_reason = sie64a(vcpu->arch.sie_block,
1260                                      vcpu->run->s.regs.gprs);
1261                 kvm_guest_exit();
1262                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1263
1264                 rc = vcpu_post_run(vcpu, exit_reason);
1265         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
1266
1267         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
1268         return rc;
1269 }
1270
1271 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1272 {
1273         int rc;
1274         sigset_t sigsaved;
1275
1276         if (guestdbg_exit_pending(vcpu)) {
1277                 kvm_s390_prepare_debug_exit(vcpu);
1278                 return 0;
1279         }
1280
1281         if (vcpu->sigset_active)
1282                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1283
1284         kvm_s390_vcpu_start(vcpu);
1285
1286         switch (kvm_run->exit_reason) {
1287         case KVM_EXIT_S390_SIEIC:
1288         case KVM_EXIT_UNKNOWN:
1289         case KVM_EXIT_INTR:
1290         case KVM_EXIT_S390_RESET:
1291         case KVM_EXIT_S390_UCONTROL:
1292         case KVM_EXIT_S390_TSCH:
1293         case KVM_EXIT_DEBUG:
1294                 break;
1295         default:
1296                 BUG();
1297         }
1298
1299         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
1300         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
1301         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
1302                 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
1303                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1304         }
1305         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
1306                 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
1307                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
1308                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1309         }
1310
1311         might_fault();
1312         rc = __vcpu_run(vcpu);
1313
1314         if (signal_pending(current) && !rc) {
1315                 kvm_run->exit_reason = KVM_EXIT_INTR;
1316                 rc = -EINTR;
1317         }
1318
1319         if (guestdbg_exit_pending(vcpu) && !rc)  {
1320                 kvm_s390_prepare_debug_exit(vcpu);
1321                 rc = 0;
1322         }
1323
1324         if (rc == -EOPNOTSUPP) {
1325                 /* intercept cannot be handled in-kernel, prepare kvm-run */
1326                 kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
1327                 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
1328                 kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
1329                 kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
1330                 rc = 0;
1331         }
1332
1333         if (rc == -EREMOTE) {
1334                 /* intercept was handled, but userspace support is needed
1335                  * kvm_run has been prepared by the handler */
1336                 rc = 0;
1337         }
1338
1339         kvm_run->psw_mask     = vcpu->arch.sie_block->gpsw.mask;
1340         kvm_run->psw_addr     = vcpu->arch.sie_block->gpsw.addr;
1341         kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix;
1342         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
1343
1344         if (vcpu->sigset_active)
1345                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1346
1347         vcpu->stat.exit_userspace++;
1348         return rc;
1349 }
1350
1351 /*
1352  * store status at address
1353  * we use have two special cases:
1354  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
1355  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
1356  */
1357 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
1358 {
1359         unsigned char archmode = 1;
1360         u64 clkcomp;
1361         int rc;
1362
1363         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
1364                 if (write_guest_abs(vcpu, 163, &archmode, 1))
1365                         return -EFAULT;
1366                 gpa = SAVE_AREA_BASE;
1367         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
1368                 if (write_guest_real(vcpu, 163, &archmode, 1))
1369                         return -EFAULT;
1370                 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
1371         }
1372         rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
1373                              vcpu->arch.guest_fpregs.fprs, 128);
1374         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
1375                               vcpu->run->s.regs.gprs, 128);
1376         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
1377                               &vcpu->arch.sie_block->gpsw, 16);
1378         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
1379                               &vcpu->arch.sie_block->prefix, 4);
1380         rc |= write_guest_abs(vcpu,
1381                               gpa + offsetof(struct save_area, fp_ctrl_reg),
1382                               &vcpu->arch.guest_fpregs.fpc, 4);
1383         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
1384                               &vcpu->arch.sie_block->todpr, 4);
1385         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
1386                               &vcpu->arch.sie_block->cputm, 8);
1387         clkcomp = vcpu->arch.sie_block->ckc >> 8;
1388         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
1389                               &clkcomp, 8);
1390         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
1391                               &vcpu->run->s.regs.acrs, 64);
1392         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
1393                               &vcpu->arch.sie_block->gcr, 128);
1394         return rc ? -EFAULT : 0;
1395 }
1396
1397 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
1398 {
1399         /*
1400          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
1401          * copying in vcpu load/put. Lets update our copies before we save
1402          * it into the save area
1403          */
1404         save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1405         save_fp_regs(vcpu->arch.guest_fpregs.fprs);
1406         save_access_regs(vcpu->run->s.regs.acrs);
1407
1408         return kvm_s390_store_status_unloaded(vcpu, addr);
1409 }
1410
1411 static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
1412 {
1413         return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
1414 }
1415
1416 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
1417 {
1418         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
1419         kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
1420         exit_sie_sync(vcpu);
1421 }
1422
1423 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
1424 {
1425         unsigned int i;
1426         struct kvm_vcpu *vcpu;
1427
1428         kvm_for_each_vcpu(i, vcpu, kvm) {
1429                 __disable_ibs_on_vcpu(vcpu);
1430         }
1431 }
1432
1433 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
1434 {
1435         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
1436         kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
1437         exit_sie_sync(vcpu);
1438 }
1439
1440 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
1441 {
1442         int i, online_vcpus, started_vcpus = 0;
1443
1444         if (!is_vcpu_stopped(vcpu))
1445                 return;
1446
1447         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
1448         /* Only one cpu at a time may enter/leave the STOPPED state. */
1449         spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
1450         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
1451
1452         for (i = 0; i < online_vcpus; i++) {
1453                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
1454                         started_vcpus++;
1455         }
1456
1457         if (started_vcpus == 0) {
1458                 /* we're the only active VCPU -> speed it up */
1459                 __enable_ibs_on_vcpu(vcpu);
1460         } else if (started_vcpus == 1) {
1461                 /*
1462                  * As we are starting a second VCPU, we have to disable
1463                  * the IBS facility on all VCPUs to remove potentially
1464                  * oustanding ENABLE requests.
1465                  */
1466                 __disable_ibs_on_all_vcpus(vcpu->kvm);
1467         }
1468
1469         atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
1470         /*
1471          * Another VCPU might have used IBS while we were offline.
1472          * Let's play safe and flush the VCPU at startup.
1473          */
1474         vcpu->arch.sie_block->ihcpu  = 0xffff;
1475         spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
1476         return;
1477 }
1478
1479 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
1480 {
1481         int i, online_vcpus, started_vcpus = 0;
1482         struct kvm_vcpu *started_vcpu = NULL;
1483
1484         if (is_vcpu_stopped(vcpu))
1485                 return;
1486
1487         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
1488         /* Only one cpu at a time may enter/leave the STOPPED state. */
1489         spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
1490         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
1491
1492         atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
1493         __disable_ibs_on_vcpu(vcpu);
1494
1495         for (i = 0; i < online_vcpus; i++) {
1496                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
1497                         started_vcpus++;
1498                         started_vcpu = vcpu->kvm->vcpus[i];
1499                 }
1500         }
1501
1502         if (started_vcpus == 1) {
1503                 /*
1504                  * As we only have one VCPU left, we want to enable the
1505                  * IBS facility for that VCPU to speed it up.
1506                  */
1507                 __enable_ibs_on_vcpu(started_vcpu);
1508         }
1509
1510         spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
1511         return;
1512 }
1513
1514 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
1515                                      struct kvm_enable_cap *cap)
1516 {
1517         int r;
1518
1519         if (cap->flags)
1520                 return -EINVAL;
1521
1522         switch (cap->cap) {
1523         case KVM_CAP_S390_CSS_SUPPORT:
1524                 if (!vcpu->kvm->arch.css_support) {
1525                         vcpu->kvm->arch.css_support = 1;
1526                         trace_kvm_s390_enable_css(vcpu->kvm);
1527                 }
1528                 r = 0;
1529                 break;
1530         default:
1531                 r = -EINVAL;
1532                 break;
1533         }
1534         return r;
1535 }
1536
1537 long kvm_arch_vcpu_ioctl(struct file *filp,
1538                          unsigned int ioctl, unsigned long arg)
1539 {
1540         struct kvm_vcpu *vcpu = filp->private_data;
1541         void __user *argp = (void __user *)arg;
1542         int idx;
1543         long r;
1544
1545         switch (ioctl) {
1546         case KVM_S390_INTERRUPT: {
1547                 struct kvm_s390_interrupt s390int;
1548
1549                 r = -EFAULT;
1550                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1551                         break;
1552                 r = kvm_s390_inject_vcpu(vcpu, &s390int);
1553                 break;
1554         }
1555         case KVM_S390_STORE_STATUS:
1556                 idx = srcu_read_lock(&vcpu->kvm->srcu);
1557                 r = kvm_s390_vcpu_store_status(vcpu, arg);
1558                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1559                 break;
1560         case KVM_S390_SET_INITIAL_PSW: {
1561                 psw_t psw;
1562
1563                 r = -EFAULT;
1564                 if (copy_from_user(&psw, argp, sizeof(psw)))
1565                         break;
1566                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
1567                 break;
1568         }
1569         case KVM_S390_INITIAL_RESET:
1570                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
1571                 break;
1572         case KVM_SET_ONE_REG:
1573         case KVM_GET_ONE_REG: {
1574                 struct kvm_one_reg reg;
1575                 r = -EFAULT;
1576                 if (copy_from_user(&reg, argp, sizeof(reg)))
1577                         break;
1578                 if (ioctl == KVM_SET_ONE_REG)
1579                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
1580                 else
1581                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
1582                 break;
1583         }
1584 #ifdef CONFIG_KVM_S390_UCONTROL
1585         case KVM_S390_UCAS_MAP: {
1586                 struct kvm_s390_ucas_mapping ucasmap;
1587
1588                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1589                         r = -EFAULT;
1590                         break;
1591                 }
1592
1593                 if (!kvm_is_ucontrol(vcpu->kvm)) {
1594                         r = -EINVAL;
1595                         break;
1596                 }
1597
1598                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
1599                                      ucasmap.vcpu_addr, ucasmap.length);
1600                 break;
1601         }
1602         case KVM_S390_UCAS_UNMAP: {
1603                 struct kvm_s390_ucas_mapping ucasmap;
1604
1605                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1606                         r = -EFAULT;
1607                         break;
1608                 }
1609
1610                 if (!kvm_is_ucontrol(vcpu->kvm)) {
1611                         r = -EINVAL;
1612                         break;
1613                 }
1614
1615                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
1616                         ucasmap.length);
1617                 break;
1618         }
1619 #endif
1620         case KVM_S390_VCPU_FAULT: {
1621                 r = gmap_fault(arg, vcpu->arch.gmap);
1622                 if (!IS_ERR_VALUE(r))
1623                         r = 0;
1624                 break;
1625         }
1626         case KVM_ENABLE_CAP:
1627         {
1628                 struct kvm_enable_cap cap;
1629                 r = -EFAULT;
1630                 if (copy_from_user(&cap, argp, sizeof(cap)))
1631                         break;
1632                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
1633                 break;
1634         }
1635         default:
1636                 r = -ENOTTY;
1637         }
1638         return r;
1639 }
1640
1641 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1642 {
1643 #ifdef CONFIG_KVM_S390_UCONTROL
1644         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
1645                  && (kvm_is_ucontrol(vcpu->kvm))) {
1646                 vmf->page = virt_to_page(vcpu->arch.sie_block);
1647                 get_page(vmf->page);
1648                 return 0;
1649         }
1650 #endif
1651         return VM_FAULT_SIGBUS;
1652 }
1653
1654 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1655                            struct kvm_memory_slot *dont)
1656 {
1657 }
1658
1659 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1660                             unsigned long npages)
1661 {
1662         return 0;
1663 }
1664
1665 void kvm_arch_memslots_updated(struct kvm *kvm)
1666 {
1667 }
1668
1669 /* Section: memory related */
1670 int kvm_arch_prepare_memory_region(struct kvm *kvm,
1671                                    struct kvm_memory_slot *memslot,
1672                                    struct kvm_userspace_memory_region *mem,
1673                                    enum kvm_mr_change change)
1674 {
1675         /* A few sanity checks. We can have memory slots which have to be
1676            located/ended at a segment boundary (1MB). The memory in userland is
1677            ok to be fragmented into various different vmas. It is okay to mmap()
1678            and munmap() stuff in this slot after doing this call at any time */
1679
1680         if (mem->userspace_addr & 0xffffful)
1681                 return -EINVAL;
1682
1683         if (mem->memory_size & 0xffffful)
1684                 return -EINVAL;
1685
1686         return 0;
1687 }
1688
1689 void kvm_arch_commit_memory_region(struct kvm *kvm,
1690                                 struct kvm_userspace_memory_region *mem,
1691                                 const struct kvm_memory_slot *old,
1692                                 enum kvm_mr_change change)
1693 {
1694         int rc;
1695
1696         /* If the basics of the memslot do not change, we do not want
1697          * to update the gmap. Every update causes several unnecessary
1698          * segment translation exceptions. This is usually handled just
1699          * fine by the normal fault handler + gmap, but it will also
1700          * cause faults on the prefix page of running guest CPUs.
1701          */
1702         if (old->userspace_addr == mem->userspace_addr &&
1703             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
1704             old->npages * PAGE_SIZE == mem->memory_size)
1705                 return;
1706
1707         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
1708                 mem->guest_phys_addr, mem->memory_size);
1709         if (rc)
1710                 printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
1711         return;
1712 }
1713
1714 void kvm_arch_flush_shadow_all(struct kvm *kvm)
1715 {
1716 }
1717
1718 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1719                                    struct kvm_memory_slot *slot)
1720 {
1721 }
1722
1723 static int __init kvm_s390_init(void)
1724 {
1725         int ret;
1726         ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1727         if (ret)
1728                 return ret;
1729
1730         /*
1731          * guests can ask for up to 255+1 double words, we need a full page
1732          * to hold the maximum amount of facilities. On the other hand, we
1733          * only set facilities that are known to work in KVM.
1734          */
1735         vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
1736         if (!vfacilities) {
1737                 kvm_exit();
1738                 return -ENOMEM;
1739         }
1740         memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
1741         vfacilities[0] &= 0xff82fff3f4fc2000UL;
1742         vfacilities[1] &= 0x005c000000000000UL;
1743         return 0;
1744 }
1745
1746 static void __exit kvm_s390_exit(void)
1747 {
1748         free_page((unsigned long) vfacilities);
1749         kvm_exit();
1750 }
1751
1752 module_init(kvm_s390_init);
1753 module_exit(kvm_s390_exit);
1754
1755 /*
1756  * Enable autoloading of the kvm module.
1757  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
1758  * since x86 takes a different approach.
1759  */
1760 #include <linux/miscdevice.h>
1761 MODULE_ALIAS_MISCDEV(KVM_MINOR);
1762 MODULE_ALIAS("devname:kvm");