arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #include <linux/compiler.h>
  15 #include <linux/err.h>
  16 #include <linux/fs.h>
  17 #include <linux/hrtimer.h>
  18 #include <linux/init.h>
  19 #include <linux/kvm.h>
  20 #include <linux/kvm_host.h>
  21 #include <linux/mman.h>
  22 #include <linux/module.h>
  23 #include <linux/moduleparam.h>
  24 #include <linux/random.h>
  25 #include <linux/slab.h>
  26 #include <linux/timer.h>
  27 #include <linux/vmalloc.h>
  28 #include <linux/bitmap.h>
  29 #include <linux/sched/signal.h>
  30 #include <linux/string.h>
  31
  32 #include <asm/asm-offsets.h>
  33 #include <asm/lowcore.h>
  34 #include <asm/stp.h>
  35 #include <asm/pgtable.h>
  36 #include <asm/gmap.h>
  37 #include <asm/nmi.h>
  38 #include <asm/switch_to.h>
  39 #include <asm/isc.h>
  40 #include <asm/sclp.h>
  41 #include <asm/cpacf.h>
  42 #include <asm/timex.h>
  43 #include "kvm-s390.h"
  44 #include "gaccess.h"
  45
  46 #define KMSG_COMPONENT "kvm-s390"
  47 #undef pr_fmt
  48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60
  61 struct kvm_stats_debugfs_item debugfs_entries[] = {
  62         { "userspace_handled", VCPU_STAT(exit_userspace) },
  63         { "exit_null", VCPU_STAT(exit_null) },
  64         { "exit_validity", VCPU_STAT(exit_validity) },
  65         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  66         { "exit_external_request", VCPU_STAT(exit_external_request) },
  67         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  68         { "exit_instruction", VCPU_STAT(exit_instruction) },
  69         { "exit_pei", VCPU_STAT(exit_pei) },
  70         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  71         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  72         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  73         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  74         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  75         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  76         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  77         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  78         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  79         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  80         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  81         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  82         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  83         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  84         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  85         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  86         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  87         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  88         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  89         { "deliver_io_interrupt", VCPU_STAT(deliver_io_int) },
  90         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  91         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
  92         { "instruction_gs", VCPU_STAT(instruction_gs) },
  93         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
  94         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
  95         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
  96         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  97         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
  98         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  99         { "instruction_sck", VCPU_STAT(instruction_sck) },
 100         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 101         { "instruction_spx", VCPU_STAT(instruction_spx) },
 102         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 103         { "instruction_stap", VCPU_STAT(instruction_stap) },
 104         { "instruction_iske", VCPU_STAT(instruction_iske) },
 105         { "instruction_ri", VCPU_STAT(instruction_ri) },
 106         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 107         { "instruction_sske", VCPU_STAT(instruction_sske) },
 108         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 109         { "instruction_essa", VCPU_STAT(instruction_essa) },
 110         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 111         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 112         { "instruction_tb", VCPU_STAT(instruction_tb) },
 113         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 114         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 115         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 116         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 117         { "instruction_sie", VCPU_STAT(instruction_sie) },
 118         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 119         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 120         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 121         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 122         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 123         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 124         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 125         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 126         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 127         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 128         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 129         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 130         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 131         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 132         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 133         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 134         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 135         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 136         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 137         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 138         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 139         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 140         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 141         { NULL }
 142 };
 143
 144 struct kvm_s390_tod_clock_ext {
 145         __u8 epoch_idx;
 146         __u64 tod;
 147         __u8 reserved[7];
 148 } __packed;
 149
 150 /* allow nested virtualization in KVM (if enabled by user space) */
 151 static int nested;
 152 module_param(nested, int, S_IRUGO);
 153 MODULE_PARM_DESC(nested, "Nested virtualization support");
 154
 155 /* upper facilities limit for kvm */
 156 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 157
 158 unsigned long kvm_s390_fac_list_mask_size(void)
 159 {
 160         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 161         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 162 }
 163
 164 /* available cpu features supported by kvm */
 165 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 166 /* available subfunctions indicated via query / "test bit" */
 167 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 168
 169 static struct gmap_notifier gmap_notifier;
 170 static struct gmap_notifier vsie_gmap_notifier;
 171 debug_info_t *kvm_s390_dbf;
 172
 173 /* Section: not file related */
 174 int kvm_arch_hardware_enable(void)
 175 {
 176         /* every s390 is virtualization enabled ;-) */
 177         return 0;
 178 }
 179
 180 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 181                               unsigned long end);
 182
 183 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 184 {
 185         u8 delta_idx = 0;
 186
 187         /*
 188          * The TOD jumps by delta, we have to compensate this by adding
 189          * -delta to the epoch.
 190          */
 191         delta = -delta;
 192
 193         /* sign-extension - we're adding to signed values below */
 194         if ((s64)delta < 0)
 195                 delta_idx = -1;
 196
 197         scb->epoch += delta;
 198         if (scb->ecd & ECD_MEF) {
 199                 scb->epdx += delta_idx;
 200                 if (scb->epoch < delta)
 201                         scb->epdx += 1;
 202         }
 203 }
 204
 205 /*
 206  * This callback is executed during stop_machine(). All CPUs are therefore
 207  * temporarily stopped. In order not to change guest behavior, we have to
 208  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 209  * so a CPU won't be stopped while calculating with the epoch.
 210  */
 211 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 212                           void *v)
 213 {
 214         struct kvm *kvm;
 215         struct kvm_vcpu *vcpu;
 216         int i;
 217         unsigned long long *delta = v;
 218
 219         list_for_each_entry(kvm, &vm_list, vm_list) {
 220                 kvm_for_each_vcpu(i, vcpu, kvm) {
 221                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 222                         if (i == 0) {
 223                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 224                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 225                         }
 226                         if (vcpu->arch.cputm_enabled)
 227                                 vcpu->arch.cputm_start += *delta;
 228                         if (vcpu->arch.vsie_block)
 229                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 230                                                    *delta);
 231                 }
 232         }
 233         return NOTIFY_OK;
 234 }
 235
 236 static struct notifier_block kvm_clock_notifier = {
 237         .notifier_call = kvm_clock_sync,
 238 };
 239
 240 int kvm_arch_hardware_setup(void)
 241 {
 242         gmap_notifier.notifier_call = kvm_gmap_notifier;
 243         gmap_register_pte_notifier(&gmap_notifier);
 244         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 245         gmap_register_pte_notifier(&vsie_gmap_notifier);
 246         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 247                                        &kvm_clock_notifier);
 248         return 0;
 249 }
 250
 251 void kvm_arch_hardware_unsetup(void)
 252 {
 253         gmap_unregister_pte_notifier(&gmap_notifier);
 254         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 255         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 256                                          &kvm_clock_notifier);
 257 }
 258
 259 static void allow_cpu_feat(unsigned long nr)
 260 {
 261         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 262 }
 263
 264 static inline int plo_test_bit(unsigned char nr)
 265 {
 266         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 267         int cc;
 268
 269         asm volatile(
 270                 /* Parameter registers are ignored for "test bit" */
 271                 "       plo     0,0,0,0(0)\n"
 272                 "       ipm     %0\n"
 273                 "       srl     %0,28\n"
 274                 : "=d" (cc)
 275                 : "d" (r0)
 276                 : "cc");
 277         return cc == 0;
 278 }
 279
 280 static void kvm_s390_cpu_feat_init(void)
 281 {
 282         int i;
 283
 284         for (i = 0; i < 256; ++i) {
 285                 if (plo_test_bit(i))
 286                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 287         }
 288
 289         if (test_facility(28)) /* TOD-clock steering */
 290                 ptff(kvm_s390_available_subfunc.ptff,
 291                      sizeof(kvm_s390_available_subfunc.ptff),
 292                      PTFF_QAF);
 293
 294         if (test_facility(17)) { /* MSA */
 295                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 296                               kvm_s390_available_subfunc.kmac);
 297                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 298                               kvm_s390_available_subfunc.kmc);
 299                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 300                               kvm_s390_available_subfunc.km);
 301                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 302                               kvm_s390_available_subfunc.kimd);
 303                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 304                               kvm_s390_available_subfunc.klmd);
 305         }
 306         if (test_facility(76)) /* MSA3 */
 307                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 308                               kvm_s390_available_subfunc.pckmo);
 309         if (test_facility(77)) { /* MSA4 */
 310                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 311                               kvm_s390_available_subfunc.kmctr);
 312                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 313                               kvm_s390_available_subfunc.kmf);
 314                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 315                               kvm_s390_available_subfunc.kmo);
 316                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 317                               kvm_s390_available_subfunc.pcc);
 318         }
 319         if (test_facility(57)) /* MSA5 */
 320                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 321                               kvm_s390_available_subfunc.ppno);
 322
 323         if (test_facility(146)) /* MSA8 */
 324                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 325                               kvm_s390_available_subfunc.kma);
 326
 327         if (MACHINE_HAS_ESOP)
 328                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 329         /*
 330          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 331          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 332          */
 333         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 334             !test_facility(3) || !nested)
 335                 return;
 336         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 337         if (sclp.has_64bscao)
 338                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 339         if (sclp.has_siif)
 340                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 341         if (sclp.has_gpere)
 342                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 343         if (sclp.has_gsls)
 344                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 345         if (sclp.has_ib)
 346                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 347         if (sclp.has_cei)
 348                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 349         if (sclp.has_ibs)
 350                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 351         if (sclp.has_kss)
 352                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 353         /*
 354          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 355          * all skey handling functions read/set the skey from the PGSTE
 356          * instead of the real storage key.
 357          *
 358          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 359          * pages being detected as preserved although they are resident.
 360          *
 361          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 362          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 363          *
 364          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 365          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 366          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 367          *
 368          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 369          * cannot easily shadow the SCA because of the ipte lock.
 370          */
 371 }
 372
 373 int kvm_arch_init(void *opaque)
 374 {
 375         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 376         if (!kvm_s390_dbf)
 377                 return -ENOMEM;
 378
 379         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 380                 debug_unregister(kvm_s390_dbf);
 381                 return -ENOMEM;
 382         }
 383
 384         kvm_s390_cpu_feat_init();
 385
 386         /* Register floating interrupt controller interface. */
 387         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 388 }
 389
 390 void kvm_arch_exit(void)
 391 {
 392         debug_unregister(kvm_s390_dbf);
 393 }
 394
 395 /* Section: device related */
 396 long kvm_arch_dev_ioctl(struct file *filp,
 397                         unsigned int ioctl, unsigned long arg)
 398 {
 399         if (ioctl == KVM_S390_ENABLE_SIE)
 400                 return s390_enable_sie();
 401         return -EINVAL;
 402 }
 403
 404 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 405 {
 406         int r;
 407
 408         switch (ext) {
 409         case KVM_CAP_S390_PSW:
 410         case KVM_CAP_S390_GMAP:
 411         case KVM_CAP_SYNC_MMU:
 412 #ifdef CONFIG_KVM_S390_UCONTROL
 413         case KVM_CAP_S390_UCONTROL:
 414 #endif
 415         case KVM_CAP_ASYNC_PF:
 416         case KVM_CAP_SYNC_REGS:
 417         case KVM_CAP_ONE_REG:
 418         case KVM_CAP_ENABLE_CAP:
 419         case KVM_CAP_S390_CSS_SUPPORT:
 420         case KVM_CAP_IOEVENTFD:
 421         case KVM_CAP_DEVICE_CTRL:
 422         case KVM_CAP_ENABLE_CAP_VM:
 423         case KVM_CAP_S390_IRQCHIP:
 424         case KVM_CAP_VM_ATTRIBUTES:
 425         case KVM_CAP_MP_STATE:
 426         case KVM_CAP_IMMEDIATE_EXIT:
 427         case KVM_CAP_S390_INJECT_IRQ:
 428         case KVM_CAP_S390_USER_SIGP:
 429         case KVM_CAP_S390_USER_STSI:
 430         case KVM_CAP_S390_SKEYS:
 431         case KVM_CAP_S390_IRQ_STATE:
 432         case KVM_CAP_S390_USER_INSTR0:
 433         case KVM_CAP_S390_CMMA_MIGRATION:
 434         case KVM_CAP_S390_AIS:
 435         case KVM_CAP_S390_AIS_MIGRATION:
 436                 r = 1;
 437                 break;
 438         case KVM_CAP_S390_MEM_OP:
 439                 r = MEM_OP_MAX_SIZE;
 440                 break;
 441         case KVM_CAP_NR_VCPUS:
 442         case KVM_CAP_MAX_VCPUS:
 443                 r = KVM_S390_BSCA_CPU_SLOTS;
 444                 if (!kvm_s390_use_sca_entries())
 445                         r = KVM_MAX_VCPUS;
 446                 else if (sclp.has_esca && sclp.has_64bscao)
 447                         r = KVM_S390_ESCA_CPU_SLOTS;
 448                 break;
 449         case KVM_CAP_NR_MEMSLOTS:
 450                 r = KVM_USER_MEM_SLOTS;
 451                 break;
 452         case KVM_CAP_S390_COW:
 453                 r = MACHINE_HAS_ESOP;
 454                 break;
 455         case KVM_CAP_S390_VECTOR_REGISTERS:
 456                 r = MACHINE_HAS_VX;
 457                 break;
 458         case KVM_CAP_S390_RI:
 459                 r = test_facility(64);
 460                 break;
 461         case KVM_CAP_S390_GS:
 462                 r = test_facility(133);
 463                 break;
 464         case KVM_CAP_S390_BPB:
 465                 r = test_facility(82);
 466                 break;
 467         default:
 468                 r = 0;
 469         }
 470         return r;
 471 }
 472
 473 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 474                                         struct kvm_memory_slot *memslot)
 475 {
 476         gfn_t cur_gfn, last_gfn;
 477         unsigned long address;
 478         struct gmap *gmap = kvm->arch.gmap;
 479
 480         /* Loop over all guest pages */
 481         last_gfn = memslot->base_gfn + memslot->npages;
 482         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 483                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 484
 485                 if (test_and_clear_guest_dirty(gmap->mm, address))
 486                         mark_page_dirty(kvm, cur_gfn);
 487                 if (fatal_signal_pending(current))
 488                         return;
 489                 cond_resched();
 490         }
 491 }
 492
 493 /* Section: vm related */
 494 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 495
 496 /*
 497  * Get (and clear) the dirty memory log for a memory slot.
 498  */
 499 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 500                                struct kvm_dirty_log *log)
 501 {
 502         int r;
 503         unsigned long n;
 504         struct kvm_memslots *slots;
 505         struct kvm_memory_slot *memslot;
 506         int is_dirty = 0;
 507
 508         if (kvm_is_ucontrol(kvm))
 509                 return -EINVAL;
 510
 511         mutex_lock(&kvm->slots_lock);
 512
 513         r = -EINVAL;
 514         if (log->slot >= KVM_USER_MEM_SLOTS)
 515                 goto out;
 516
 517         slots = kvm_memslots(kvm);
 518         memslot = id_to_memslot(slots, log->slot);
 519         r = -ENOENT;
 520         if (!memslot->dirty_bitmap)
 521                 goto out;
 522
 523         kvm_s390_sync_dirty_log(kvm, memslot);
 524         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 525         if (r)
 526                 goto out;
 527
 528         /* Clear the dirty log */
 529         if (is_dirty) {
 530                 n = kvm_dirty_bitmap_bytes(memslot);
 531                 memset(memslot->dirty_bitmap, 0, n);
 532         }
 533         r = 0;
 534 out:
 535         mutex_unlock(&kvm->slots_lock);
 536         return r;
 537 }
 538
 539 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 540 {
 541         unsigned int i;
 542         struct kvm_vcpu *vcpu;
 543
 544         kvm_for_each_vcpu(i, vcpu, kvm) {
 545                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 546         }
 547 }
 548
 549 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 550 {
 551         int r;
 552
 553         if (cap->flags)
 554                 return -EINVAL;
 555
 556         switch (cap->cap) {
 557         case KVM_CAP_S390_IRQCHIP:
 558                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 559                 kvm->arch.use_irqchip = 1;
 560                 r = 0;
 561                 break;
 562         case KVM_CAP_S390_USER_SIGP:
 563                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 564                 kvm->arch.user_sigp = 1;
 565                 r = 0;
 566                 break;
 567         case KVM_CAP_S390_VECTOR_REGISTERS:
 568                 mutex_lock(&kvm->lock);
 569                 if (kvm->created_vcpus) {
 570                         r = -EBUSY;
 571                 } else if (MACHINE_HAS_VX) {
 572                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 573                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 574                         if (test_facility(134)) {
 575                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 576                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 577                         }
 578                         if (test_facility(135)) {
 579                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 580                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 581                         }
 582                         r = 0;
 583                 } else
 584                         r = -EINVAL;
 585                 mutex_unlock(&kvm->lock);
 586                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 587                          r ? "(not available)" : "(success)");
 588                 break;
 589         case KVM_CAP_S390_RI:
 590                 r = -EINVAL;
 591                 mutex_lock(&kvm->lock);
 592                 if (kvm->created_vcpus) {
 593                         r = -EBUSY;
 594                 } else if (test_facility(64)) {
 595                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 596                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 597                         r = 0;
 598                 }
 599                 mutex_unlock(&kvm->lock);
 600                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 601                          r ? "(not available)" : "(success)");
 602                 break;
 603         case KVM_CAP_S390_AIS:
 604                 mutex_lock(&kvm->lock);
 605                 if (kvm->created_vcpus) {
 606                         r = -EBUSY;
 607                 } else {
 608                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 609                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 610                         r = 0;
 611                 }
 612                 mutex_unlock(&kvm->lock);
 613                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 614                          r ? "(not available)" : "(success)");
 615                 break;
 616         case KVM_CAP_S390_GS:
 617                 r = -EINVAL;
 618                 mutex_lock(&kvm->lock);
 619                 if (kvm->created_vcpus) {
 620                         r = -EBUSY;
 621                 } else if (test_facility(133)) {
 622                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 623                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 624                         r = 0;
 625                 }
 626                 mutex_unlock(&kvm->lock);
 627                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 628                          r ? "(not available)" : "(success)");
 629                 break;
 630         case KVM_CAP_S390_USER_STSI:
 631                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 632                 kvm->arch.user_stsi = 1;
 633                 r = 0;
 634                 break;
 635         case KVM_CAP_S390_USER_INSTR0:
 636                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 637                 kvm->arch.user_instr0 = 1;
 638                 icpt_operexc_on_all_vcpus(kvm);
 639                 r = 0;
 640                 break;
 641         default:
 642                 r = -EINVAL;
 643                 break;
 644         }
 645         return r;
 646 }
 647
 648 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 649 {
 650         int ret;
 651
 652         switch (attr->attr) {
 653         case KVM_S390_VM_MEM_LIMIT_SIZE:
 654                 ret = 0;
 655                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 656                          kvm->arch.mem_limit);
 657                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 658                         ret = -EFAULT;
 659                 break;
 660         default:
 661                 ret = -ENXIO;
 662                 break;
 663         }
 664         return ret;
 665 }
 666
 667 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 668 {
 669         int ret;
 670         unsigned int idx;
 671         switch (attr->attr) {
 672         case KVM_S390_VM_MEM_ENABLE_CMMA:
 673                 ret = -ENXIO;
 674                 if (!sclp.has_cmma)
 675                         break;
 676
 677                 ret = -EBUSY;
 678                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 679                 mutex_lock(&kvm->lock);
 680                 if (!kvm->created_vcpus) {
 681                         kvm->arch.use_cmma = 1;
 682                         ret = 0;
 683                 }
 684                 mutex_unlock(&kvm->lock);
 685                 break;
 686         case KVM_S390_VM_MEM_CLR_CMMA:
 687                 ret = -ENXIO;
 688                 if (!sclp.has_cmma)
 689                         break;
 690                 ret = -EINVAL;
 691                 if (!kvm->arch.use_cmma)
 692                         break;
 693
 694                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 695                 mutex_lock(&kvm->lock);
 696                 idx = srcu_read_lock(&kvm->srcu);
 697                 s390_reset_cmma(kvm->arch.gmap->mm);
 698                 srcu_read_unlock(&kvm->srcu, idx);
 699                 mutex_unlock(&kvm->lock);
 700                 ret = 0;
 701                 break;
 702         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 703                 unsigned long new_limit;
 704
 705                 if (kvm_is_ucontrol(kvm))
 706                         return -EINVAL;
 707
 708                 if (get_user(new_limit, (u64 __user *)attr->addr))
 709                         return -EFAULT;
 710
 711                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 712                     new_limit > kvm->arch.mem_limit)
 713                         return -E2BIG;
 714
 715                 if (!new_limit)
 716                         return -EINVAL;
 717
 718                 /* gmap_create takes last usable address */
 719                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 720                         new_limit -= 1;
 721
 722                 ret = -EBUSY;
 723                 mutex_lock(&kvm->lock);
 724                 if (!kvm->created_vcpus) {
 725                         /* gmap_create will round the limit up */
 726                         struct gmap *new = gmap_create(current->mm, new_limit);
 727
 728                         if (!new) {
 729                                 ret = -ENOMEM;
 730                         } else {
 731                                 gmap_remove(kvm->arch.gmap);
 732                                 new->private = kvm;
 733                                 kvm->arch.gmap = new;
 734                                 ret = 0;
 735                         }
 736                 }
 737                 mutex_unlock(&kvm->lock);
 738                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 739                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 740                          (void *) kvm->arch.gmap->asce);
 741                 break;
 742         }
 743         default:
 744                 ret = -ENXIO;
 745                 break;
 746         }
 747         return ret;
 748 }
 749
 750 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 751
 752 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 753 {
 754         struct kvm_vcpu *vcpu;
 755         int i;
 756
 757         if (!test_kvm_facility(kvm, 76))
 758                 return -EINVAL;
 759
 760         mutex_lock(&kvm->lock);
 761         switch (attr->attr) {
 762         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 763                 get_random_bytes(
 764                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 765                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 766                 kvm->arch.crypto.aes_kw = 1;
 767                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 768                 break;
 769         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 770                 get_random_bytes(
 771                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 772                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 773                 kvm->arch.crypto.dea_kw = 1;
 774                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 775                 break;
 776         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 777                 kvm->arch.crypto.aes_kw = 0;
 778                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 779                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 780                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 781                 break;
 782         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 783                 kvm->arch.crypto.dea_kw = 0;
 784                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 785                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 786                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 787                 break;
 788         default:
 789                 mutex_unlock(&kvm->lock);
 790                 return -ENXIO;
 791         }
 792
 793         kvm_for_each_vcpu(i, vcpu, kvm) {
 794                 kvm_s390_vcpu_crypto_setup(vcpu);
 795                 exit_sie(vcpu);
 796         }
 797         mutex_unlock(&kvm->lock);
 798         return 0;
 799 }
 800
 801 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 802 {
 803         int cx;
 804         struct kvm_vcpu *vcpu;
 805
 806         kvm_for_each_vcpu(cx, vcpu, kvm)
 807                 kvm_s390_sync_request(req, vcpu);
 808 }
 809
 810 /*
 811  * Must be called with kvm->srcu held to avoid races on memslots, and with
 812  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 813  */
 814 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 815 {
 816         struct kvm_s390_migration_state *mgs;
 817         struct kvm_memory_slot *ms;
 818         /* should be the only one */
 819         struct kvm_memslots *slots;
 820         unsigned long ram_pages;
 821         int slotnr;
 822
 823         /* migration mode already enabled */
 824         if (kvm->arch.migration_state)
 825                 return 0;
 826
 827         slots = kvm_memslots(kvm);
 828         if (!slots || !slots->used_slots)
 829                 return -EINVAL;
 830
 831         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
 832         if (!mgs)
 833                 return -ENOMEM;
 834         kvm->arch.migration_state = mgs;
 835
 836         if (kvm->arch.use_cmma) {
 837                 /*
 838                  * Get the first slot. They are reverse sorted by base_gfn, so
 839                  * the first slot is also the one at the end of the address
 840                  * space. We have verified above that at least one slot is
 841                  * present.
 842                  */
 843                 ms = slots->memslots;
 844                 /* round up so we only use full longs */
 845                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
 846                 /* allocate enough bytes to store all the bits */
 847                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
 848                 if (!mgs->pgste_bitmap) {
 849                         kfree(mgs);
 850                         kvm->arch.migration_state = NULL;
 851                         return -ENOMEM;
 852                 }
 853
 854                 mgs->bitmap_size = ram_pages;
 855                 atomic64_set(&mgs->dirty_pages, ram_pages);
 856                 /* mark all the pages in active slots as dirty */
 857                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 858                         ms = slots->memslots + slotnr;
 859                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
 860                 }
 861
 862                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 863         }
 864         return 0;
 865 }
 866
 867 /*
 868  * Must be called with kvm->slots_lock to avoid races with ourselves and
 869  * kvm_s390_vm_start_migration.
 870  */
 871 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 872 {
 873         struct kvm_s390_migration_state *mgs;
 874
 875         /* migration mode already disabled */
 876         if (!kvm->arch.migration_state)
 877                 return 0;
 878         mgs = kvm->arch.migration_state;
 879         kvm->arch.migration_state = NULL;
 880
 881         if (kvm->arch.use_cmma) {
 882                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 883                 /* We have to wait for the essa emulation to finish */
 884                 synchronize_srcu(&kvm->srcu);
 885                 vfree(mgs->pgste_bitmap);
 886         }
 887         kfree(mgs);
 888         return 0;
 889 }
 890
 891 static int kvm_s390_vm_set_migration(struct kvm *kvm,
 892                                      struct kvm_device_attr *attr)
 893 {
 894         int res = -ENXIO;
 895
 896         mutex_lock(&kvm->slots_lock);
 897         switch (attr->attr) {
 898         case KVM_S390_VM_MIGRATION_START:
 899                 res = kvm_s390_vm_start_migration(kvm);
 900                 break;
 901         case KVM_S390_VM_MIGRATION_STOP:
 902                 res = kvm_s390_vm_stop_migration(kvm);
 903                 break;
 904         default:
 905                 break;
 906         }
 907         mutex_unlock(&kvm->slots_lock);
 908
 909         return res;
 910 }
 911
 912 static int kvm_s390_vm_get_migration(struct kvm *kvm,
 913                                      struct kvm_device_attr *attr)
 914 {
 915         u64 mig = (kvm->arch.migration_state != NULL);
 916
 917         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
 918                 return -ENXIO;
 919
 920         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
 921                 return -EFAULT;
 922         return 0;
 923 }
 924
 925 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 926 {
 927         struct kvm_s390_vm_tod_clock gtod;
 928
 929         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 930                 return -EFAULT;
 931
 932         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
 933                 return -EINVAL;
 934         kvm_s390_set_tod_clock(kvm, &gtod);
 935
 936         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
 937                 gtod.epoch_idx, gtod.tod);
 938
 939         return 0;
 940 }
 941
 942 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 943 {
 944         u8 gtod_high;
 945
 946         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 947                                            sizeof(gtod_high)))
 948                 return -EFAULT;
 949
 950         if (gtod_high != 0)
 951                 return -EINVAL;
 952         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 953
 954         return 0;
 955 }
 956
 957 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 958 {
 959         struct kvm_s390_vm_tod_clock gtod = { 0 };
 960
 961         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
 962                            sizeof(gtod.tod)))
 963                 return -EFAULT;
 964
 965         kvm_s390_set_tod_clock(kvm, &gtod);
 966         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
 967         return 0;
 968 }
 969
 970 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 971 {
 972         int ret;
 973
 974         if (attr->flags)
 975                 return -EINVAL;
 976
 977         switch (attr->attr) {
 978         case KVM_S390_VM_TOD_EXT:
 979                 ret = kvm_s390_set_tod_ext(kvm, attr);
 980                 break;
 981         case KVM_S390_VM_TOD_HIGH:
 982                 ret = kvm_s390_set_tod_high(kvm, attr);
 983                 break;
 984         case KVM_S390_VM_TOD_LOW:
 985                 ret = kvm_s390_set_tod_low(kvm, attr);
 986                 break;
 987         default:
 988                 ret = -ENXIO;
 989                 break;
 990         }
 991         return ret;
 992 }
 993
 994 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
 995                                         struct kvm_s390_vm_tod_clock *gtod)
 996 {
 997         struct kvm_s390_tod_clock_ext htod;
 998
 999         preempt_disable();
1000
1001         get_tod_clock_ext((char *)&htod);
1002
1003         gtod->tod = htod.tod + kvm->arch.epoch;
1004         gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1005
1006         if (gtod->tod < htod.tod)
1007                 gtod->epoch_idx += 1;
1008
1009         preempt_enable();
1010 }
1011
1012 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1013 {
1014         struct kvm_s390_vm_tod_clock gtod;
1015
1016         memset(&gtod, 0, sizeof(gtod));
1017
1018         if (test_kvm_facility(kvm, 139))
1019                 kvm_s390_get_tod_clock_ext(kvm, &gtod);
1020         else
1021                 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
1022
1023         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1024                 return -EFAULT;
1025
1026         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1027                 gtod.epoch_idx, gtod.tod);
1028         return 0;
1029 }
1030
1031 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1032 {
1033         u8 gtod_high = 0;
1034
1035         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1036                                          sizeof(gtod_high)))
1037                 return -EFAULT;
1038         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1039
1040         return 0;
1041 }
1042
1043 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1044 {
1045         u64 gtod;
1046
1047         gtod = kvm_s390_get_tod_clock_fast(kvm);
1048         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1049                 return -EFAULT;
1050         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1051
1052         return 0;
1053 }
1054
1055 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1056 {
1057         int ret;
1058
1059         if (attr->flags)
1060                 return -EINVAL;
1061
1062         switch (attr->attr) {
1063         case KVM_S390_VM_TOD_EXT:
1064                 ret = kvm_s390_get_tod_ext(kvm, attr);
1065                 break;
1066         case KVM_S390_VM_TOD_HIGH:
1067                 ret = kvm_s390_get_tod_high(kvm, attr);
1068                 break;
1069         case KVM_S390_VM_TOD_LOW:
1070                 ret = kvm_s390_get_tod_low(kvm, attr);
1071                 break;
1072         default:
1073                 ret = -ENXIO;
1074                 break;
1075         }
1076         return ret;
1077 }
1078
1079 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1080 {
1081         struct kvm_s390_vm_cpu_processor *proc;
1082         u16 lowest_ibc, unblocked_ibc;
1083         int ret = 0;
1084
1085         mutex_lock(&kvm->lock);
1086         if (kvm->created_vcpus) {
1087                 ret = -EBUSY;
1088                 goto out;
1089         }
1090         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1091         if (!proc) {
1092                 ret = -ENOMEM;
1093                 goto out;
1094         }
1095         if (!copy_from_user(proc, (void __user *)attr->addr,
1096                             sizeof(*proc))) {
1097                 kvm->arch.model.cpuid = proc->cpuid;
1098                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1099                 unblocked_ibc = sclp.ibc & 0xfff;
1100                 if (lowest_ibc && proc->ibc) {
1101                         if (proc->ibc > unblocked_ibc)
1102                                 kvm->arch.model.ibc = unblocked_ibc;
1103                         else if (proc->ibc < lowest_ibc)
1104                                 kvm->arch.model.ibc = lowest_ibc;
1105                         else
1106                                 kvm->arch.model.ibc = proc->ibc;
1107                 }
1108                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1109                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1110                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1111                          kvm->arch.model.ibc,
1112                          kvm->arch.model.cpuid);
1113                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1114                          kvm->arch.model.fac_list[0],
1115                          kvm->arch.model.fac_list[1],
1116                          kvm->arch.model.fac_list[2]);
1117         } else
1118                 ret = -EFAULT;
1119         kfree(proc);
1120 out:
1121         mutex_unlock(&kvm->lock);
1122         return ret;
1123 }
1124
1125 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1126                                        struct kvm_device_attr *attr)
1127 {
1128         struct kvm_s390_vm_cpu_feat data;
1129
1130         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1131                 return -EFAULT;
1132         if (!bitmap_subset((unsigned long *) data.feat,
1133                            kvm_s390_available_cpu_feat,
1134                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1135                 return -EINVAL;
1136
1137         mutex_lock(&kvm->lock);
1138         if (kvm->created_vcpus) {
1139                 mutex_unlock(&kvm->lock);
1140                 return -EBUSY;
1141         }
1142         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1143                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1144         mutex_unlock(&kvm->lock);
1145         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1146                          data.feat[0],
1147                          data.feat[1],
1148                          data.feat[2]);
1149         return 0;
1150 }
1151
1152 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1153                                           struct kvm_device_attr *attr)
1154 {
1155         /*
1156          * Once supported by kernel + hw, we have to store the subfunctions
1157          * in kvm->arch and remember that user space configured them.
1158          */
1159         return -ENXIO;
1160 }
1161
1162 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1163 {
1164         int ret = -ENXIO;
1165
1166         switch (attr->attr) {
1167         case KVM_S390_VM_CPU_PROCESSOR:
1168                 ret = kvm_s390_set_processor(kvm, attr);
1169                 break;
1170         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1171                 ret = kvm_s390_set_processor_feat(kvm, attr);
1172                 break;
1173         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1174                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1175                 break;
1176         }
1177         return ret;
1178 }
1179
1180 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1181 {
1182         struct kvm_s390_vm_cpu_processor *proc;
1183         int ret = 0;
1184
1185         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1186         if (!proc) {
1187                 ret = -ENOMEM;
1188                 goto out;
1189         }
1190         proc->cpuid = kvm->arch.model.cpuid;
1191         proc->ibc = kvm->arch.model.ibc;
1192         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1193                S390_ARCH_FAC_LIST_SIZE_BYTE);
1194         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1195                  kvm->arch.model.ibc,
1196                  kvm->arch.model.cpuid);
1197         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1198                  kvm->arch.model.fac_list[0],
1199                  kvm->arch.model.fac_list[1],
1200                  kvm->arch.model.fac_list[2]);
1201         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1202                 ret = -EFAULT;
1203         kfree(proc);
1204 out:
1205         return ret;
1206 }
1207
1208 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1209 {
1210         struct kvm_s390_vm_cpu_machine *mach;
1211         int ret = 0;
1212
1213         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1214         if (!mach) {
1215                 ret = -ENOMEM;
1216                 goto out;
1217         }
1218         get_cpu_id((struct cpuid *) &mach->cpuid);
1219         mach->ibc = sclp.ibc;
1220         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1221                S390_ARCH_FAC_LIST_SIZE_BYTE);
1222         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1223                sizeof(S390_lowcore.stfle_fac_list));
1224         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1225                  kvm->arch.model.ibc,
1226                  kvm->arch.model.cpuid);
1227         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1228                  mach->fac_mask[0],
1229                  mach->fac_mask[1],
1230                  mach->fac_mask[2]);
1231         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1232                  mach->fac_list[0],
1233                  mach->fac_list[1],
1234                  mach->fac_list[2]);
1235         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1236                 ret = -EFAULT;
1237         kfree(mach);
1238 out:
1239         return ret;
1240 }
1241
1242 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1243                                        struct kvm_device_attr *attr)
1244 {
1245         struct kvm_s390_vm_cpu_feat data;
1246
1247         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1248                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1249         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1250                 return -EFAULT;
1251         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1252                          data.feat[0],
1253                          data.feat[1],
1254                          data.feat[2]);
1255         return 0;
1256 }
1257
1258 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1259                                      struct kvm_device_attr *attr)
1260 {
1261         struct kvm_s390_vm_cpu_feat data;
1262
1263         bitmap_copy((unsigned long *) data.feat,
1264                     kvm_s390_available_cpu_feat,
1265                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1266         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1267                 return -EFAULT;
1268         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1269                          data.feat[0],
1270                          data.feat[1],
1271                          data.feat[2]);
1272         return 0;
1273 }
1274
1275 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1276                                           struct kvm_device_attr *attr)
1277 {
1278         /*
1279          * Once we can actually configure subfunctions (kernel + hw support),
1280          * we have to check if they were already set by user space, if so copy
1281          * them from kvm->arch.
1282          */
1283         return -ENXIO;
1284 }
1285
1286 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1287                                         struct kvm_device_attr *attr)
1288 {
1289         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1290             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1291                 return -EFAULT;
1292         return 0;
1293 }
1294 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1295 {
1296         int ret = -ENXIO;
1297
1298         switch (attr->attr) {
1299         case KVM_S390_VM_CPU_PROCESSOR:
1300                 ret = kvm_s390_get_processor(kvm, attr);
1301                 break;
1302         case KVM_S390_VM_CPU_MACHINE:
1303                 ret = kvm_s390_get_machine(kvm, attr);
1304                 break;
1305         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1306                 ret = kvm_s390_get_processor_feat(kvm, attr);
1307                 break;
1308         case KVM_S390_VM_CPU_MACHINE_FEAT:
1309                 ret = kvm_s390_get_machine_feat(kvm, attr);
1310                 break;
1311         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1312                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1313                 break;
1314         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1315                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1316                 break;
1317         }
1318         return ret;
1319 }
1320
1321 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1322 {
1323         int ret;
1324
1325         switch (attr->group) {
1326         case KVM_S390_VM_MEM_CTRL:
1327                 ret = kvm_s390_set_mem_control(kvm, attr);
1328                 break;
1329         case KVM_S390_VM_TOD:
1330                 ret = kvm_s390_set_tod(kvm, attr);
1331                 break;
1332         case KVM_S390_VM_CPU_MODEL:
1333                 ret = kvm_s390_set_cpu_model(kvm, attr);
1334                 break;
1335         case KVM_S390_VM_CRYPTO:
1336                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1337                 break;
1338         case KVM_S390_VM_MIGRATION:
1339                 ret = kvm_s390_vm_set_migration(kvm, attr);
1340                 break;
1341         default:
1342                 ret = -ENXIO;
1343                 break;
1344         }
1345
1346         return ret;
1347 }
1348
1349 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1350 {
1351         int ret;
1352
1353         switch (attr->group) {
1354         case KVM_S390_VM_MEM_CTRL:
1355                 ret = kvm_s390_get_mem_control(kvm, attr);
1356                 break;
1357         case KVM_S390_VM_TOD:
1358                 ret = kvm_s390_get_tod(kvm, attr);
1359                 break;
1360         case KVM_S390_VM_CPU_MODEL:
1361                 ret = kvm_s390_get_cpu_model(kvm, attr);
1362                 break;
1363         case KVM_S390_VM_MIGRATION:
1364                 ret = kvm_s390_vm_get_migration(kvm, attr);
1365                 break;
1366         default:
1367                 ret = -ENXIO;
1368                 break;
1369         }
1370
1371         return ret;
1372 }
1373
1374 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1375 {
1376         int ret;
1377
1378         switch (attr->group) {
1379         case KVM_S390_VM_MEM_CTRL:
1380                 switch (attr->attr) {
1381                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1382                 case KVM_S390_VM_MEM_CLR_CMMA:
1383                         ret = sclp.has_cmma ? 0 : -ENXIO;
1384                         break;
1385                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1386                         ret = 0;
1387                         break;
1388                 default:
1389                         ret = -ENXIO;
1390                         break;
1391                 }
1392                 break;
1393         case KVM_S390_VM_TOD:
1394                 switch (attr->attr) {
1395                 case KVM_S390_VM_TOD_LOW:
1396                 case KVM_S390_VM_TOD_HIGH:
1397                         ret = 0;
1398                         break;
1399                 default:
1400                         ret = -ENXIO;
1401                         break;
1402                 }
1403                 break;
1404         case KVM_S390_VM_CPU_MODEL:
1405                 switch (attr->attr) {
1406                 case KVM_S390_VM_CPU_PROCESSOR:
1407                 case KVM_S390_VM_CPU_MACHINE:
1408                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1409                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1410                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1411                         ret = 0;
1412                         break;
1413                 /* configuring subfunctions is not supported yet */
1414                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1415                 default:
1416                         ret = -ENXIO;
1417                         break;
1418                 }
1419                 break;
1420         case KVM_S390_VM_CRYPTO:
1421                 switch (attr->attr) {
1422                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1423                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1424                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1425                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1426                         ret = 0;
1427                         break;
1428                 default:
1429                         ret = -ENXIO;
1430                         break;
1431                 }
1432                 break;
1433         case KVM_S390_VM_MIGRATION:
1434                 ret = 0;
1435                 break;
1436         default:
1437                 ret = -ENXIO;
1438                 break;
1439         }
1440
1441         return ret;
1442 }
1443
1444 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1445 {
1446         uint8_t *keys;
1447         uint64_t hva;
1448         int srcu_idx, i, r = 0;
1449
1450         if (args->flags != 0)
1451                 return -EINVAL;
1452
1453         /* Is this guest using storage keys? */
1454         if (!mm_use_skey(current->mm))
1455                 return KVM_S390_GET_SKEYS_NONE;
1456
1457         /* Enforce sane limit on memory allocation */
1458         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1459                 return -EINVAL;
1460
1461         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1462         if (!keys)
1463                 return -ENOMEM;
1464
1465         down_read(&current->mm->mmap_sem);
1466         srcu_idx = srcu_read_lock(&kvm->srcu);
1467         for (i = 0; i < args->count; i++) {
1468                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1469                 if (kvm_is_error_hva(hva)) {
1470                         r = -EFAULT;
1471                         break;
1472                 }
1473
1474                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1475                 if (r)
1476                         break;
1477         }
1478         srcu_read_unlock(&kvm->srcu, srcu_idx);
1479         up_read(&current->mm->mmap_sem);
1480
1481         if (!r) {
1482                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1483                                  sizeof(uint8_t) * args->count);
1484                 if (r)
1485                         r = -EFAULT;
1486         }
1487
1488         kvfree(keys);
1489         return r;
1490 }
1491
1492 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1493 {
1494         uint8_t *keys;
1495         uint64_t hva;
1496         int srcu_idx, i, r = 0;
1497
1498         if (args->flags != 0)
1499                 return -EINVAL;
1500
1501         /* Enforce sane limit on memory allocation */
1502         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1503                 return -EINVAL;
1504
1505         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1506         if (!keys)
1507                 return -ENOMEM;
1508
1509         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1510                            sizeof(uint8_t) * args->count);
1511         if (r) {
1512                 r = -EFAULT;
1513                 goto out;
1514         }
1515
1516         /* Enable storage key handling for the guest */
1517         r = s390_enable_skey();
1518         if (r)
1519                 goto out;
1520
1521         down_read(&current->mm->mmap_sem);
1522         srcu_idx = srcu_read_lock(&kvm->srcu);
1523         for (i = 0; i < args->count; i++) {
1524                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1525                 if (kvm_is_error_hva(hva)) {
1526                         r = -EFAULT;
1527                         break;
1528                 }
1529
1530                 /* Lowest order bit is reserved */
1531                 if (keys[i] & 0x01) {
1532                         r = -EINVAL;
1533                         break;
1534                 }
1535
1536                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1537                 if (r)
1538                         break;
1539         }
1540         srcu_read_unlock(&kvm->srcu, srcu_idx);
1541         up_read(&current->mm->mmap_sem);
1542 out:
1543         kvfree(keys);
1544         return r;
1545 }
1546
1547 /*
1548  * Base address and length must be sent at the start of each block, therefore
1549  * it's cheaper to send some clean data, as long as it's less than the size of
1550  * two longs.
1551  */
1552 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1553 /* for consistency */
1554 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1555
1556 /*
1557  * This function searches for the next page with dirty CMMA attributes, and
1558  * saves the attributes in the buffer up to either the end of the buffer or
1559  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1560  * no trailing clean bytes are saved.
1561  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1562  * output buffer will indicate 0 as length.
1563  */
1564 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1565                                   struct kvm_s390_cmma_log *args)
1566 {
1567         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1568         unsigned long bufsize, hva, pgstev, i, next, cur;
1569         int srcu_idx, peek, r = 0, rr;
1570         u8 *res;
1571
1572         cur = args->start_gfn;
1573         i = next = pgstev = 0;
1574
1575         if (unlikely(!kvm->arch.use_cmma))
1576                 return -ENXIO;
1577         /* Invalid/unsupported flags were specified */
1578         if (args->flags & ~KVM_S390_CMMA_PEEK)
1579                 return -EINVAL;
1580         /* Migration mode query, and we are not doing a migration */
1581         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1582         if (!peek && !s)
1583                 return -EINVAL;
1584         /* CMMA is disabled or was not used, or the buffer has length zero */
1585         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1586         if (!bufsize || !kvm->mm->context.use_cmma) {
1587                 memset(args, 0, sizeof(*args));
1588                 return 0;
1589         }
1590
1591         if (!peek) {
1592                 /* We are not peeking, and there are no dirty pages */
1593                 if (!atomic64_read(&s->dirty_pages)) {
1594                         memset(args, 0, sizeof(*args));
1595                         return 0;
1596                 }
1597                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1598                                     args->start_gfn);
1599                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1600                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1601                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1602                         memset(args, 0, sizeof(*args));
1603                         return 0;
1604                 }
1605                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1606         }
1607
1608         res = vmalloc(bufsize);
1609         if (!res)
1610                 return -ENOMEM;
1611
1612         args->start_gfn = cur;
1613
1614         down_read(&kvm->mm->mmap_sem);
1615         srcu_idx = srcu_read_lock(&kvm->srcu);
1616         while (i < bufsize) {
1617                 hva = gfn_to_hva(kvm, cur);
1618                 if (kvm_is_error_hva(hva)) {
1619                         r = -EFAULT;
1620                         break;
1621                 }
1622                 /* decrement only if we actually flipped the bit to 0 */
1623                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1624                         atomic64_dec(&s->dirty_pages);
1625                 r = get_pgste(kvm->mm, hva, &pgstev);
1626                 if (r < 0)
1627                         pgstev = 0;
1628                 /* save the value */
1629                 res[i++] = (pgstev >> 24) & 0x43;
1630                 /*
1631                  * if the next bit is too far away, stop.
1632                  * if we reached the previous "next", find the next one
1633                  */
1634                 if (!peek) {
1635                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1636                                 break;
1637                         if (cur == next)
1638                                 next = find_next_bit(s->pgste_bitmap,
1639                                                      s->bitmap_size, cur + 1);
1640                 /* reached the end of the bitmap or of the buffer, stop */
1641                         if ((next >= s->bitmap_size) ||
1642                             (next >= args->start_gfn + bufsize))
1643                                 break;
1644                 }
1645                 cur++;
1646         }
1647         srcu_read_unlock(&kvm->srcu, srcu_idx);
1648         up_read(&kvm->mm->mmap_sem);
1649         args->count = i;
1650         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1651
1652         rr = copy_to_user((void __user *)args->values, res, args->count);
1653         if (rr)
1654                 r = -EFAULT;
1655
1656         vfree(res);
1657         return r;
1658 }
1659
1660 /*
1661  * This function sets the CMMA attributes for the given pages. If the input
1662  * buffer has zero length, no action is taken, otherwise the attributes are
1663  * set and the mm->context.use_cmma flag is set.
1664  */
1665 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1666                                   const struct kvm_s390_cmma_log *args)
1667 {
1668         unsigned long hva, mask, pgstev, i;
1669         uint8_t *bits;
1670         int srcu_idx, r = 0;
1671
1672         mask = args->mask;
1673
1674         if (!kvm->arch.use_cmma)
1675                 return -ENXIO;
1676         /* invalid/unsupported flags */
1677         if (args->flags != 0)
1678                 return -EINVAL;
1679         /* Enforce sane limit on memory allocation */
1680         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1681                 return -EINVAL;
1682         /* Nothing to do */
1683         if (args->count == 0)
1684                 return 0;
1685
1686         bits = vmalloc(sizeof(*bits) * args->count);
1687         if (!bits)
1688                 return -ENOMEM;
1689
1690         r = copy_from_user(bits, (void __user *)args->values, args->count);
1691         if (r) {
1692                 r = -EFAULT;
1693                 goto out;
1694         }
1695
1696         down_read(&kvm->mm->mmap_sem);
1697         srcu_idx = srcu_read_lock(&kvm->srcu);
1698         for (i = 0; i < args->count; i++) {
1699                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1700                 if (kvm_is_error_hva(hva)) {
1701                         r = -EFAULT;
1702                         break;
1703                 }
1704
1705                 pgstev = bits[i];
1706                 pgstev = pgstev << 24;
1707                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1708                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1709         }
1710         srcu_read_unlock(&kvm->srcu, srcu_idx);
1711         up_read(&kvm->mm->mmap_sem);
1712
1713         if (!kvm->mm->context.use_cmma) {
1714                 down_write(&kvm->mm->mmap_sem);
1715                 kvm->mm->context.use_cmma = 1;
1716                 up_write(&kvm->mm->mmap_sem);
1717         }
1718 out:
1719         vfree(bits);
1720         return r;
1721 }
1722
1723 long kvm_arch_vm_ioctl(struct file *filp,
1724                        unsigned int ioctl, unsigned long arg)
1725 {
1726         struct kvm *kvm = filp->private_data;
1727         void __user *argp = (void __user *)arg;
1728         struct kvm_device_attr attr;
1729         int r;
1730
1731         switch (ioctl) {
1732         case KVM_S390_INTERRUPT: {
1733                 struct kvm_s390_interrupt s390int;
1734
1735                 r = -EFAULT;
1736                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1737                         break;
1738                 r = kvm_s390_inject_vm(kvm, &s390int);
1739                 break;
1740         }
1741         case KVM_ENABLE_CAP: {
1742                 struct kvm_enable_cap cap;
1743                 r = -EFAULT;
1744                 if (copy_from_user(&cap, argp, sizeof(cap)))
1745                         break;
1746                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1747                 break;
1748         }
1749         case KVM_CREATE_IRQCHIP: {
1750                 struct kvm_irq_routing_entry routing;
1751
1752                 r = -EINVAL;
1753                 if (kvm->arch.use_irqchip) {
1754                         /* Set up dummy routing. */
1755                         memset(&routing, 0, sizeof(routing));
1756                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1757                 }
1758                 break;
1759         }
1760         case KVM_SET_DEVICE_ATTR: {
1761                 r = -EFAULT;
1762                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1763                         break;
1764                 r = kvm_s390_vm_set_attr(kvm, &attr);
1765                 break;
1766         }
1767         case KVM_GET_DEVICE_ATTR: {
1768                 r = -EFAULT;
1769                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1770                         break;
1771                 r = kvm_s390_vm_get_attr(kvm, &attr);
1772                 break;
1773         }
1774         case KVM_HAS_DEVICE_ATTR: {
1775                 r = -EFAULT;
1776                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1777                         break;
1778                 r = kvm_s390_vm_has_attr(kvm, &attr);
1779                 break;
1780         }
1781         case KVM_S390_GET_SKEYS: {
1782                 struct kvm_s390_skeys args;
1783
1784                 r = -EFAULT;
1785                 if (copy_from_user(&args, argp,
1786                                    sizeof(struct kvm_s390_skeys)))
1787                         break;
1788                 r = kvm_s390_get_skeys(kvm, &args);
1789                 break;
1790         }
1791         case KVM_S390_SET_SKEYS: {
1792                 struct kvm_s390_skeys args;
1793
1794                 r = -EFAULT;
1795                 if (copy_from_user(&args, argp,
1796                                    sizeof(struct kvm_s390_skeys)))
1797                         break;
1798                 r = kvm_s390_set_skeys(kvm, &args);
1799                 break;
1800         }
1801         case KVM_S390_GET_CMMA_BITS: {
1802                 struct kvm_s390_cmma_log args;
1803
1804                 r = -EFAULT;
1805                 if (copy_from_user(&args, argp, sizeof(args)))
1806                         break;
1807                 mutex_lock(&kvm->slots_lock);
1808                 r = kvm_s390_get_cmma_bits(kvm, &args);
1809                 mutex_unlock(&kvm->slots_lock);
1810                 if (!r) {
1811                         r = copy_to_user(argp, &args, sizeof(args));
1812                         if (r)
1813                                 r = -EFAULT;
1814                 }
1815                 break;
1816         }
1817         case KVM_S390_SET_CMMA_BITS: {
1818                 struct kvm_s390_cmma_log args;
1819
1820                 r = -EFAULT;
1821                 if (copy_from_user(&args, argp, sizeof(args)))
1822                         break;
1823                 mutex_lock(&kvm->slots_lock);
1824                 r = kvm_s390_set_cmma_bits(kvm, &args);
1825                 mutex_unlock(&kvm->slots_lock);
1826                 break;
1827         }
1828         default:
1829                 r = -ENOTTY;
1830         }
1831
1832         return r;
1833 }
1834
1835 static int kvm_s390_query_ap_config(u8 *config)
1836 {
1837         u32 fcn_code = 0x04000000UL;
1838         u32 cc = 0;
1839
1840         memset(config, 0, 128);
1841         asm volatile(
1842                 "lgr 0,%1\n"
1843                 "lgr 2,%2\n"
1844                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1845                 "0: ipm %0\n"
1846                 "srl %0,28\n"
1847                 "1:\n"
1848                 EX_TABLE(0b, 1b)
1849                 : "+r" (cc)
1850                 : "r" (fcn_code), "r" (config)
1851                 : "cc", "0", "2", "memory"
1852         );
1853
1854         return cc;
1855 }
1856
1857 static int kvm_s390_apxa_installed(void)
1858 {
1859         u8 config[128];
1860         int cc;
1861
1862         if (test_facility(12)) {
1863                 cc = kvm_s390_query_ap_config(config);
1864
1865                 if (cc)
1866                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1867                 else
1868                         return config[0] & 0x40;
1869         }
1870
1871         return 0;
1872 }
1873
1874 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1875 {
1876         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1877
1878         if (kvm_s390_apxa_installed())
1879                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1880         else
1881                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1882 }
1883
1884 static u64 kvm_s390_get_initial_cpuid(void)
1885 {
1886         struct cpuid cpuid;
1887
1888         get_cpu_id(&cpuid);
1889         cpuid.version = 0xff;
1890         return *((u64 *) &cpuid);
1891 }
1892
1893 static void kvm_s390_crypto_init(struct kvm *kvm)
1894 {
1895         if (!test_kvm_facility(kvm, 76))
1896                 return;
1897
1898         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1899         kvm_s390_set_crycb_format(kvm);
1900
1901         /* Enable AES/DEA protected key functions by default */
1902         kvm->arch.crypto.aes_kw = 1;
1903         kvm->arch.crypto.dea_kw = 1;
1904         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1905                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1906         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1907                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1908 }
1909
1910 static void sca_dispose(struct kvm *kvm)
1911 {
1912         if (kvm->arch.use_esca)
1913                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1914         else
1915                 free_page((unsigned long)(kvm->arch.sca));
1916         kvm->arch.sca = NULL;
1917 }
1918
1919 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1920 {
1921         gfp_t alloc_flags = GFP_KERNEL;
1922         int i, rc;
1923         char debug_name[16];
1924         static unsigned long sca_offset;
1925
1926         rc = -EINVAL;
1927 #ifdef CONFIG_KVM_S390_UCONTROL
1928         if (type & ~KVM_VM_S390_UCONTROL)
1929                 goto out_err;
1930         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1931                 goto out_err;
1932 #else
1933         if (type)
1934                 goto out_err;
1935 #endif
1936
1937         rc = s390_enable_sie();
1938         if (rc)
1939                 goto out_err;
1940
1941         rc = -ENOMEM;
1942
1943         kvm->arch.use_esca = 0; /* start with basic SCA */
1944         if (!sclp.has_64bscao)
1945                 alloc_flags |= GFP_DMA;
1946         rwlock_init(&kvm->arch.sca_lock);
1947         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1948         if (!kvm->arch.sca)
1949                 goto out_err;
1950         spin_lock(&kvm_lock);
1951         sca_offset += 16;
1952         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1953                 sca_offset = 0;
1954         kvm->arch.sca = (struct bsca_block *)
1955                         ((char *) kvm->arch.sca + sca_offset);
1956         spin_unlock(&kvm_lock);
1957
1958         sprintf(debug_name, "kvm-%u", current->pid);
1959
1960         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1961         if (!kvm->arch.dbf)
1962                 goto out_err;
1963
1964         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
1965         kvm->arch.sie_page2 =
1966              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1967         if (!kvm->arch.sie_page2)
1968                 goto out_err;
1969
1970         /* Populate the facility mask initially. */
1971         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1972                sizeof(S390_lowcore.stfle_fac_list));
1973         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1974                 if (i < kvm_s390_fac_list_mask_size())
1975                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1976                 else
1977                         kvm->arch.model.fac_mask[i] = 0UL;
1978         }
1979
1980         /* Populate the facility list initially. */
1981         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1982         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1983                S390_ARCH_FAC_LIST_SIZE_BYTE);
1984
1985         /* we are always in czam mode - even on pre z14 machines */
1986         set_kvm_facility(kvm->arch.model.fac_mask, 138);
1987         set_kvm_facility(kvm->arch.model.fac_list, 138);
1988         /* we emulate STHYI in kvm */
1989         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1990         set_kvm_facility(kvm->arch.model.fac_list, 74);
1991         if (MACHINE_HAS_TLB_GUEST) {
1992                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1993                 set_kvm_facility(kvm->arch.model.fac_list, 147);
1994         }
1995
1996         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1997         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1998
1999         kvm_s390_crypto_init(kvm);
2000
2001         mutex_init(&kvm->arch.float_int.ais_lock);
2002         kvm->arch.float_int.simm = 0;
2003         kvm->arch.float_int.nimm = 0;
2004         spin_lock_init(&kvm->arch.float_int.lock);
2005         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2006                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2007         init_waitqueue_head(&kvm->arch.ipte_wq);
2008         mutex_init(&kvm->arch.ipte_mutex);
2009
2010         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2011         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2012
2013         if (type & KVM_VM_S390_UCONTROL) {
2014                 kvm->arch.gmap = NULL;
2015                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2016         } else {
2017                 if (sclp.hamax == U64_MAX)
2018                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2019                 else
2020                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2021                                                     sclp.hamax + 1);
2022                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2023                 if (!kvm->arch.gmap)
2024                         goto out_err;
2025                 kvm->arch.gmap->private = kvm;
2026                 kvm->arch.gmap->pfault_enabled = 0;
2027         }
2028
2029         kvm->arch.css_support = 0;
2030         kvm->arch.use_irqchip = 0;
2031         kvm->arch.epoch = 0;
2032
2033         spin_lock_init(&kvm->arch.start_stop_lock);
2034         kvm_s390_vsie_init(kvm);
2035         kvm_s390_gisa_init(kvm);
2036         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2037
2038         return 0;
2039 out_err:
2040         free_page((unsigned long)kvm->arch.sie_page2);
2041         debug_unregister(kvm->arch.dbf);
2042         sca_dispose(kvm);
2043         KVM_EVENT(3, "creation of vm failed: %d", rc);
2044         return rc;
2045 }
2046
2047 bool kvm_arch_has_vcpu_debugfs(void)
2048 {
2049         return false;
2050 }
2051
2052 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2053 {
2054         return 0;
2055 }
2056
2057 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2058 {
2059         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2060         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2061         kvm_s390_clear_local_irqs(vcpu);
2062         kvm_clear_async_pf_completion_queue(vcpu);
2063         if (!kvm_is_ucontrol(vcpu->kvm))
2064                 sca_del_vcpu(vcpu);
2065
2066         if (kvm_is_ucontrol(vcpu->kvm))
2067                 gmap_remove(vcpu->arch.gmap);
2068
2069         if (vcpu->kvm->arch.use_cmma)
2070                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2071         free_page((unsigned long)(vcpu->arch.sie_block));
2072
2073         kvm_vcpu_uninit(vcpu);
2074         kmem_cache_free(kvm_vcpu_cache, vcpu);
2075 }
2076
2077 static void kvm_free_vcpus(struct kvm *kvm)
2078 {
2079         unsigned int i;
2080         struct kvm_vcpu *vcpu;
2081
2082         kvm_for_each_vcpu(i, vcpu, kvm)
2083                 kvm_arch_vcpu_destroy(vcpu);
2084
2085         mutex_lock(&kvm->lock);
2086         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2087                 kvm->vcpus[i] = NULL;
2088
2089         atomic_set(&kvm->online_vcpus, 0);
2090         mutex_unlock(&kvm->lock);
2091 }
2092
2093 void kvm_arch_destroy_vm(struct kvm *kvm)
2094 {
2095         kvm_free_vcpus(kvm);
2096         sca_dispose(kvm);
2097         debug_unregister(kvm->arch.dbf);
2098         kvm_s390_gisa_destroy(kvm);
2099         free_page((unsigned long)kvm->arch.sie_page2);
2100         if (!kvm_is_ucontrol(kvm))
2101                 gmap_remove(kvm->arch.gmap);
2102         kvm_s390_destroy_adapters(kvm);
2103         kvm_s390_clear_float_irqs(kvm);
2104         kvm_s390_vsie_destroy(kvm);
2105         if (kvm->arch.migration_state) {
2106                 vfree(kvm->arch.migration_state->pgste_bitmap);
2107                 kfree(kvm->arch.migration_state);
2108         }
2109         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2110 }
2111
2112 /* Section: vcpu related */
2113 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2114 {
2115         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2116         if (!vcpu->arch.gmap)
2117                 return -ENOMEM;
2118         vcpu->arch.gmap->private = vcpu->kvm;
2119
2120         return 0;
2121 }
2122
2123 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2124 {
2125         if (!kvm_s390_use_sca_entries())
2126                 return;
2127         read_lock(&vcpu->kvm->arch.sca_lock);
2128         if (vcpu->kvm->arch.use_esca) {
2129                 struct esca_block *sca = vcpu->kvm->arch.sca;
2130
2131                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2132                 sca->cpu[vcpu->vcpu_id].sda = 0;
2133         } else {
2134                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2135
2136                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2137                 sca->cpu[vcpu->vcpu_id].sda = 0;
2138         }
2139         read_unlock(&vcpu->kvm->arch.sca_lock);
2140 }
2141
2142 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2143 {
2144         if (!kvm_s390_use_sca_entries()) {
2145                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2146
2147                 /* we still need the basic sca for the ipte control */
2148                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2149                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2150                 return;
2151         }
2152         read_lock(&vcpu->kvm->arch.sca_lock);
2153         if (vcpu->kvm->arch.use_esca) {
2154                 struct esca_block *sca = vcpu->kvm->arch.sca;
2155
2156                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2157                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2158                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2159                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2160                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2161         } else {
2162                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2163
2164                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2165                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2166                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2167                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2168         }
2169         read_unlock(&vcpu->kvm->arch.sca_lock);
2170 }
2171
2172 /* Basic SCA to Extended SCA data copy routines */
2173 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2174 {
2175         d->sda = s->sda;
2176         d->sigp_ctrl.c = s->sigp_ctrl.c;
2177         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2178 }
2179
2180 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2181 {
2182         int i;
2183
2184         d->ipte_control = s->ipte_control;
2185         d->mcn[0] = s->mcn;
2186         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2187                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2188 }
2189
2190 static int sca_switch_to_extended(struct kvm *kvm)
2191 {
2192         struct bsca_block *old_sca = kvm->arch.sca;
2193         struct esca_block *new_sca;
2194         struct kvm_vcpu *vcpu;
2195         unsigned int vcpu_idx;
2196         u32 scaol, scaoh;
2197
2198         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2199         if (!new_sca)
2200                 return -ENOMEM;
2201
2202         scaoh = (u32)((u64)(new_sca) >> 32);
2203         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2204
2205         kvm_s390_vcpu_block_all(kvm);
2206         write_lock(&kvm->arch.sca_lock);
2207
2208         sca_copy_b_to_e(new_sca, old_sca);
2209
2210         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2211                 vcpu->arch.sie_block->scaoh = scaoh;
2212                 vcpu->arch.sie_block->scaol = scaol;
2213                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2214         }
2215         kvm->arch.sca = new_sca;
2216         kvm->arch.use_esca = 1;
2217
2218         write_unlock(&kvm->arch.sca_lock);
2219         kvm_s390_vcpu_unblock_all(kvm);
2220
2221         free_page((unsigned long)old_sca);
2222
2223         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2224                  old_sca, kvm->arch.sca);
2225         return 0;
2226 }
2227
2228 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2229 {
2230         int rc;
2231
2232         if (!kvm_s390_use_sca_entries()) {
2233                 if (id < KVM_MAX_VCPUS)
2234                         return true;
2235                 return false;
2236         }
2237         if (id < KVM_S390_BSCA_CPU_SLOTS)
2238                 return true;
2239         if (!sclp.has_esca || !sclp.has_64bscao)
2240                 return false;
2241
2242         mutex_lock(&kvm->lock);
2243         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2244         mutex_unlock(&kvm->lock);
2245
2246         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2247 }
2248
2249 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2250 {
2251         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2252         kvm_clear_async_pf_completion_queue(vcpu);
2253         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2254                                     KVM_SYNC_GPRS |
2255                                     KVM_SYNC_ACRS |
2256                                     KVM_SYNC_CRS |
2257                                     KVM_SYNC_ARCH0 |
2258                                     KVM_SYNC_PFAULT;
2259         kvm_s390_set_prefix(vcpu, 0);
2260         if (test_kvm_facility(vcpu->kvm, 64))
2261                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2262         if (test_kvm_facility(vcpu->kvm, 82))
2263                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2264         if (test_kvm_facility(vcpu->kvm, 133))
2265                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2266         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2267          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2268          */
2269         if (MACHINE_HAS_VX)
2270                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2271         else
2272                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2273
2274         if (kvm_is_ucontrol(vcpu->kvm))
2275                 return __kvm_ucontrol_vcpu_init(vcpu);
2276
2277         return 0;
2278 }
2279
2280 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2281 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2282 {
2283         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2284         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2285         vcpu->arch.cputm_start = get_tod_clock_fast();
2286         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2287 }
2288
2289 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2290 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2291 {
2292         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2293         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2294         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2295         vcpu->arch.cputm_start = 0;
2296         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2297 }
2298
2299 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2300 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2301 {
2302         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2303         vcpu->arch.cputm_enabled = true;
2304         __start_cpu_timer_accounting(vcpu);
2305 }
2306
2307 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2308 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2309 {
2310         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2311         __stop_cpu_timer_accounting(vcpu);
2312         vcpu->arch.cputm_enabled = false;
2313 }
2314
2315 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2316 {
2317         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2318         __enable_cpu_timer_accounting(vcpu);
2319         preempt_enable();
2320 }
2321
2322 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2323 {
2324         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2325         __disable_cpu_timer_accounting(vcpu);
2326         preempt_enable();
2327 }
2328
2329 /* set the cpu timer - may only be called from the VCPU thread itself */
2330 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2331 {
2332         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2333         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2334         if (vcpu->arch.cputm_enabled)
2335                 vcpu->arch.cputm_start = get_tod_clock_fast();
2336         vcpu->arch.sie_block->cputm = cputm;
2337         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2338         preempt_enable();
2339 }
2340
2341 /* update and get the cpu timer - can also be called from other VCPU threads */
2342 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2343 {
2344         unsigned int seq;
2345         __u64 value;
2346
2347         if (unlikely(!vcpu->arch.cputm_enabled))
2348                 return vcpu->arch.sie_block->cputm;
2349
2350         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2351         do {
2352                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2353                 /*
2354                  * If the writer would ever execute a read in the critical
2355                  * section, e.g. in irq context, we have a deadlock.
2356                  */
2357                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2358                 value = vcpu->arch.sie_block->cputm;
2359                 /* if cputm_start is 0, accounting is being started/stopped */
2360                 if (likely(vcpu->arch.cputm_start))
2361                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2362         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2363         preempt_enable();
2364         return value;
2365 }
2366
2367 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2368 {
2369
2370         gmap_enable(vcpu->arch.enabled_gmap);
2371         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2372         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2373                 __start_cpu_timer_accounting(vcpu);
2374         vcpu->cpu = cpu;
2375 }
2376
2377 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2378 {
2379         vcpu->cpu = -1;
2380         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2381                 __stop_cpu_timer_accounting(vcpu);
2382         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2383         vcpu->arch.enabled_gmap = gmap_get_enabled();
2384         gmap_disable(vcpu->arch.enabled_gmap);
2385
2386 }
2387
2388 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2389 {
2390         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2391         vcpu->arch.sie_block->gpsw.mask = 0UL;
2392         vcpu->arch.sie_block->gpsw.addr = 0UL;
2393         kvm_s390_set_prefix(vcpu, 0);
2394         kvm_s390_set_cpu_timer(vcpu, 0);
2395         vcpu->arch.sie_block->ckc       = 0UL;
2396         vcpu->arch.sie_block->todpr     = 0;
2397         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2398         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2399         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2400         /* make sure the new fpc will be lazily loaded */
2401         save_fpu_regs();
2402         current->thread.fpu.fpc = 0;
2403         vcpu->arch.sie_block->gbea = 1;
2404         vcpu->arch.sie_block->pp = 0;
2405         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2406         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2407         kvm_clear_async_pf_completion_queue(vcpu);
2408         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2409                 kvm_s390_vcpu_stop(vcpu);
2410         kvm_s390_clear_local_irqs(vcpu);
2411 }
2412
2413 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2414 {
2415         mutex_lock(&vcpu->kvm->lock);
2416         preempt_disable();
2417         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2418         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2419         preempt_enable();
2420         mutex_unlock(&vcpu->kvm->lock);
2421         if (!kvm_is_ucontrol(vcpu->kvm)) {
2422                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2423                 sca_add_vcpu(vcpu);
2424         }
2425         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2426                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2427         /* make vcpu_load load the right gmap on the first trigger */
2428         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2429 }
2430
2431 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2432 {
2433         if (!test_kvm_facility(vcpu->kvm, 76))
2434                 return;
2435
2436         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2437
2438         if (vcpu->kvm->arch.crypto.aes_kw)
2439                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2440         if (vcpu->kvm->arch.crypto.dea_kw)
2441                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2442
2443         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2444 }
2445
2446 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2447 {
2448         free_page(vcpu->arch.sie_block->cbrlo);
2449         vcpu->arch.sie_block->cbrlo = 0;
2450 }
2451
2452 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2453 {
2454         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2455         if (!vcpu->arch.sie_block->cbrlo)
2456                 return -ENOMEM;
2457
2458         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2459         return 0;
2460 }
2461
2462 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2463 {
2464         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2465
2466         vcpu->arch.sie_block->ibc = model->ibc;
2467         if (test_kvm_facility(vcpu->kvm, 7))
2468                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2469 }
2470
2471 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2472 {
2473         int rc = 0;
2474
2475         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2476                                                     CPUSTAT_SM |
2477                                                     CPUSTAT_STOPPED);
2478
2479         if (test_kvm_facility(vcpu->kvm, 78))
2480                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2481         else if (test_kvm_facility(vcpu->kvm, 8))
2482                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2483
2484         kvm_s390_vcpu_setup_model(vcpu);
2485
2486         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2487         if (MACHINE_HAS_ESOP)
2488                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2489         if (test_kvm_facility(vcpu->kvm, 9))
2490                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2491         if (test_kvm_facility(vcpu->kvm, 73))
2492                 vcpu->arch.sie_block->ecb |= ECB_TE;
2493
2494         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2495                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2496         if (test_kvm_facility(vcpu->kvm, 130))
2497                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2498         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2499         if (sclp.has_cei)
2500                 vcpu->arch.sie_block->eca |= ECA_CEI;
2501         if (sclp.has_ib)
2502                 vcpu->arch.sie_block->eca |= ECA_IB;
2503         if (sclp.has_siif)
2504                 vcpu->arch.sie_block->eca |= ECA_SII;
2505         if (sclp.has_sigpif)
2506                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2507         if (test_kvm_facility(vcpu->kvm, 129)) {
2508                 vcpu->arch.sie_block->eca |= ECA_VX;
2509                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2510         }
2511         if (test_kvm_facility(vcpu->kvm, 139))
2512                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2513
2514         if (vcpu->arch.sie_block->gd) {
2515                 vcpu->arch.sie_block->eca |= ECA_AIV;
2516                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2517                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2518         }
2519         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2520                                         | SDNXC;
2521         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2522
2523         if (sclp.has_kss)
2524                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2525         else
2526                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2527
2528         if (vcpu->kvm->arch.use_cmma) {
2529                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2530                 if (rc)
2531                         return rc;
2532         }
2533         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2534         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2535
2536         kvm_s390_vcpu_crypto_setup(vcpu);
2537
2538         return rc;
2539 }
2540
2541 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2542                                       unsigned int id)
2543 {
2544         struct kvm_vcpu *vcpu;
2545         struct sie_page *sie_page;
2546         int rc = -EINVAL;
2547
2548         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2549                 goto out;
2550
2551         rc = -ENOMEM;
2552
2553         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2554         if (!vcpu)
2555                 goto out;
2556
2557         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2558         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2559         if (!sie_page)
2560                 goto out_free_cpu;
2561
2562         vcpu->arch.sie_block = &sie_page->sie_block;
2563         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2564
2565         /* the real guest size will always be smaller than msl */
2566         vcpu->arch.sie_block->mso = 0;
2567         vcpu->arch.sie_block->msl = sclp.hamax;
2568
2569         vcpu->arch.sie_block->icpua = id;
2570         spin_lock_init(&vcpu->arch.local_int.lock);
2571         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2572         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2573                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2574         seqcount_init(&vcpu->arch.cputm_seqcount);
2575
2576         rc = kvm_vcpu_init(vcpu, kvm, id);
2577         if (rc)
2578                 goto out_free_sie_block;
2579         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2580                  vcpu->arch.sie_block);
2581         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2582
2583         return vcpu;
2584 out_free_sie_block:
2585         free_page((unsigned long)(vcpu->arch.sie_block));
2586 out_free_cpu:
2587         kmem_cache_free(kvm_vcpu_cache, vcpu);
2588 out:
2589         return ERR_PTR(rc);
2590 }
2591
2592 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2593 {
2594         return kvm_s390_vcpu_has_irq(vcpu, 0);
2595 }
2596
2597 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2598 {
2599         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2600 }
2601
2602 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2603 {
2604         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2605         exit_sie(vcpu);
2606 }
2607
2608 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2609 {
2610         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2611 }
2612
2613 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2614 {
2615         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2616         exit_sie(vcpu);
2617 }
2618
2619 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2620 {
2621         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2622 }
2623
2624 /*
2625  * Kick a guest cpu out of SIE and wait until SIE is not running.
2626  * If the CPU is not running (e.g. waiting as idle) the function will
2627  * return immediately. */
2628 void exit_sie(struct kvm_vcpu *vcpu)
2629 {
2630         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2631         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2632                 cpu_relax();
2633 }
2634
2635 /* Kick a guest cpu out of SIE to process a request synchronously */
2636 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2637 {
2638         kvm_make_request(req, vcpu);
2639         kvm_s390_vcpu_request(vcpu);
2640 }
2641
2642 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2643                               unsigned long end)
2644 {
2645         struct kvm *kvm = gmap->private;
2646         struct kvm_vcpu *vcpu;
2647         unsigned long prefix;
2648         int i;
2649
2650         if (gmap_is_shadow(gmap))
2651                 return;
2652         if (start >= 1UL << 31)
2653                 /* We are only interested in prefix pages */
2654                 return;
2655         kvm_for_each_vcpu(i, vcpu, kvm) {
2656                 /* match against both prefix pages */
2657                 prefix = kvm_s390_get_prefix(vcpu);
2658                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2659                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2660                                    start, end);
2661                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2662                 }
2663         }
2664 }
2665
2666 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2667 {
2668         /* kvm common code refers to this, but never calls it */
2669         BUG();
2670         return 0;
2671 }
2672
2673 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2674                                            struct kvm_one_reg *reg)
2675 {
2676         int r = -EINVAL;
2677
2678         switch (reg->id) {
2679         case KVM_REG_S390_TODPR:
2680                 r = put_user(vcpu->arch.sie_block->todpr,
2681                              (u32 __user *)reg->addr);
2682                 break;
2683         case KVM_REG_S390_EPOCHDIFF:
2684                 r = put_user(vcpu->arch.sie_block->epoch,
2685                              (u64 __user *)reg->addr);
2686                 break;
2687         case KVM_REG_S390_CPU_TIMER:
2688                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2689                              (u64 __user *)reg->addr);
2690                 break;
2691         case KVM_REG_S390_CLOCK_COMP:
2692                 r = put_user(vcpu->arch.sie_block->ckc,
2693                              (u64 __user *)reg->addr);
2694                 break;
2695         case KVM_REG_S390_PFTOKEN:
2696                 r = put_user(vcpu->arch.pfault_token,
2697                              (u64 __user *)reg->addr);
2698                 break;
2699         case KVM_REG_S390_PFCOMPARE:
2700                 r = put_user(vcpu->arch.pfault_compare,
2701                              (u64 __user *)reg->addr);
2702                 break;
2703         case KVM_REG_S390_PFSELECT:
2704                 r = put_user(vcpu->arch.pfault_select,
2705                              (u64 __user *)reg->addr);
2706                 break;
2707         case KVM_REG_S390_PP:
2708                 r = put_user(vcpu->arch.sie_block->pp,
2709                              (u64 __user *)reg->addr);
2710                 break;
2711         case KVM_REG_S390_GBEA:
2712                 r = put_user(vcpu->arch.sie_block->gbea,
2713                              (u64 __user *)reg->addr);
2714                 break;
2715         default:
2716                 break;
2717         }
2718
2719         return r;
2720 }
2721
2722 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2723                                            struct kvm_one_reg *reg)
2724 {
2725         int r = -EINVAL;
2726         __u64 val;
2727
2728         switch (reg->id) {
2729         case KVM_REG_S390_TODPR:
2730                 r = get_user(vcpu->arch.sie_block->todpr,
2731                              (u32 __user *)reg->addr);
2732                 break;
2733         case KVM_REG_S390_EPOCHDIFF:
2734                 r = get_user(vcpu->arch.sie_block->epoch,
2735                              (u64 __user *)reg->addr);
2736                 break;
2737         case KVM_REG_S390_CPU_TIMER:
2738                 r = get_user(val, (u64 __user *)reg->addr);
2739                 if (!r)
2740                         kvm_s390_set_cpu_timer(vcpu, val);
2741                 break;
2742         case KVM_REG_S390_CLOCK_COMP:
2743                 r = get_user(vcpu->arch.sie_block->ckc,
2744                              (u64 __user *)reg->addr);
2745                 break;
2746         case KVM_REG_S390_PFTOKEN:
2747                 r = get_user(vcpu->arch.pfault_token,
2748                              (u64 __user *)reg->addr);
2749                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2750                         kvm_clear_async_pf_completion_queue(vcpu);
2751                 break;
2752         case KVM_REG_S390_PFCOMPARE:
2753                 r = get_user(vcpu->arch.pfault_compare,
2754                              (u64 __user *)reg->addr);
2755                 break;
2756         case KVM_REG_S390_PFSELECT:
2757                 r = get_user(vcpu->arch.pfault_select,
2758                              (u64 __user *)reg->addr);
2759                 break;
2760         case KVM_REG_S390_PP:
2761                 r = get_user(vcpu->arch.sie_block->pp,
2762                              (u64 __user *)reg->addr);
2763                 break;
2764         case KVM_REG_S390_GBEA:
2765                 r = get_user(vcpu->arch.sie_block->gbea,
2766                              (u64 __user *)reg->addr);
2767                 break;
2768         default:
2769                 break;
2770         }
2771
2772         return r;
2773 }
2774
2775 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2776 {
2777         kvm_s390_vcpu_initial_reset(vcpu);
2778         return 0;
2779 }
2780
2781 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2782 {
2783         vcpu_load(vcpu);
2784         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2785         vcpu_put(vcpu);
2786         return 0;
2787 }
2788
2789 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2790 {
2791         vcpu_load(vcpu);
2792         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2793         vcpu_put(vcpu);
2794         return 0;
2795 }
2796
2797 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2798                                   struct kvm_sregs *sregs)
2799 {
2800         vcpu_load(vcpu);
2801
2802         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2803         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2804
2805         vcpu_put(vcpu);
2806         return 0;
2807 }
2808
2809 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2810                                   struct kvm_sregs *sregs)
2811 {
2812         vcpu_load(vcpu);
2813
2814         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2815         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2816
2817         vcpu_put(vcpu);
2818         return 0;
2819 }
2820
2821 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2822 {
2823         int ret = 0;
2824
2825         vcpu_load(vcpu);
2826
2827         if (test_fp_ctl(fpu->fpc)) {
2828                 ret = -EINVAL;
2829                 goto out;
2830         }
2831         vcpu->run->s.regs.fpc = fpu->fpc;
2832         if (MACHINE_HAS_VX)
2833                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2834                                  (freg_t *) fpu->fprs);
2835         else
2836                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2837
2838 out:
2839         vcpu_put(vcpu);
2840         return ret;
2841 }
2842
2843 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2844 {
2845         vcpu_load(vcpu);
2846
2847         /* make sure we have the latest values */
2848         save_fpu_regs();
2849         if (MACHINE_HAS_VX)
2850                 convert_vx_to_fp((freg_t *) fpu->fprs,
2851                                  (__vector128 *) vcpu->run->s.regs.vrs);
2852         else
2853                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2854         fpu->fpc = vcpu->run->s.regs.fpc;
2855
2856         vcpu_put(vcpu);
2857         return 0;
2858 }
2859
2860 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2861 {
2862         int rc = 0;
2863
2864         if (!is_vcpu_stopped(vcpu))
2865                 rc = -EBUSY;
2866         else {
2867                 vcpu->run->psw_mask = psw.mask;
2868                 vcpu->run->psw_addr = psw.addr;
2869         }
2870         return rc;
2871 }
2872
2873 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2874                                   struct kvm_translation *tr)
2875 {
2876         return -EINVAL; /* not implemented yet */
2877 }
2878
2879 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2880                               KVM_GUESTDBG_USE_HW_BP | \
2881                               KVM_GUESTDBG_ENABLE)
2882
2883 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2884                                         struct kvm_guest_debug *dbg)
2885 {
2886         int rc = 0;
2887
2888         vcpu_load(vcpu);
2889
2890         vcpu->guest_debug = 0;
2891         kvm_s390_clear_bp_data(vcpu);
2892
2893         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2894                 rc = -EINVAL;
2895                 goto out;
2896         }
2897         if (!sclp.has_gpere) {
2898                 rc = -EINVAL;
2899                 goto out;
2900         }
2901
2902         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2903                 vcpu->guest_debug = dbg->control;
2904                 /* enforce guest PER */
2905                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
2906
2907                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2908                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2909         } else {
2910                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2911                 vcpu->arch.guestdbg.last_bp = 0;
2912         }
2913
2914         if (rc) {
2915                 vcpu->guest_debug = 0;
2916                 kvm_s390_clear_bp_data(vcpu);
2917                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2918         }
2919
2920 out:
2921         vcpu_put(vcpu);
2922         return rc;
2923 }
2924
2925 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2926                                     struct kvm_mp_state *mp_state)
2927 {
2928         int ret;
2929
2930         vcpu_load(vcpu);
2931
2932         /* CHECK_STOP and LOAD are not supported yet */
2933         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2934                                       KVM_MP_STATE_OPERATING;
2935
2936         vcpu_put(vcpu);
2937         return ret;
2938 }
2939
2940 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2941                                     struct kvm_mp_state *mp_state)
2942 {
2943         int rc = 0;
2944
2945         vcpu_load(vcpu);
2946
2947         /* user space knows about this interface - let it control the state */
2948         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2949
2950         switch (mp_state->mp_state) {
2951         case KVM_MP_STATE_STOPPED:
2952                 kvm_s390_vcpu_stop(vcpu);
2953                 break;
2954         case KVM_MP_STATE_OPERATING:
2955                 kvm_s390_vcpu_start(vcpu);
2956                 break;
2957         case KVM_MP_STATE_LOAD:
2958         case KVM_MP_STATE_CHECK_STOP:
2959                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2960         default:
2961                 rc = -ENXIO;
2962         }
2963
2964         vcpu_put(vcpu);
2965         return rc;
2966 }
2967
2968 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2969 {
2970         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
2971 }
2972
2973 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2974 {
2975 retry:
2976         kvm_s390_vcpu_request_handled(vcpu);
2977         if (!kvm_request_pending(vcpu))
2978                 return 0;
2979         /*
2980          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2981          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2982          * This ensures that the ipte instruction for this request has
2983          * already finished. We might race against a second unmapper that
2984          * wants to set the blocking bit. Lets just retry the request loop.
2985          */
2986         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2987                 int rc;
2988                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2989                                           kvm_s390_get_prefix(vcpu),
2990                                           PAGE_SIZE * 2, PROT_WRITE);
2991                 if (rc) {
2992                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2993                         return rc;
2994                 }
2995                 goto retry;
2996         }
2997
2998         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2999                 vcpu->arch.sie_block->ihcpu = 0xffff;
3000                 goto retry;
3001         }
3002
3003         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3004                 if (!ibs_enabled(vcpu)) {
3005                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3006                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3007                 }
3008                 goto retry;
3009         }
3010
3011         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3012                 if (ibs_enabled(vcpu)) {
3013                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3014                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3015                 }
3016                 goto retry;
3017         }
3018
3019         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3020                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3021                 goto retry;
3022         }
3023
3024         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3025                 /*
3026                  * Disable CMMA virtualization; we will emulate the ESSA
3027                  * instruction manually, in order to provide additional
3028                  * functionalities needed for live migration.
3029                  */
3030                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3031                 goto retry;
3032         }
3033
3034         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3035                 /*
3036                  * Re-enable CMMA virtualization if CMMA is available and
3037                  * was used.
3038                  */
3039                 if ((vcpu->kvm->arch.use_cmma) &&
3040                     (vcpu->kvm->mm->context.use_cmma))
3041                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3042                 goto retry;
3043         }
3044
3045         /* nothing to do, just clear the request */
3046         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3047
3048         return 0;
3049 }
3050
3051 void kvm_s390_set_tod_clock(struct kvm *kvm,
3052                             const struct kvm_s390_vm_tod_clock *gtod)
3053 {
3054         struct kvm_vcpu *vcpu;
3055         struct kvm_s390_tod_clock_ext htod;
3056         int i;
3057
3058         mutex_lock(&kvm->lock);
3059         preempt_disable();
3060
3061         get_tod_clock_ext((char *)&htod);
3062
3063         kvm->arch.epoch = gtod->tod - htod.tod;
3064         kvm->arch.epdx = 0;
3065         if (test_kvm_facility(kvm, 139)) {
3066                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3067                 if (kvm->arch.epoch > gtod->tod)
3068                         kvm->arch.epdx -= 1;
3069         }
3070
3071         kvm_s390_vcpu_block_all(kvm);
3072         kvm_for_each_vcpu(i, vcpu, kvm) {
3073                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3074                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3075         }
3076
3077         kvm_s390_vcpu_unblock_all(kvm);
3078         preempt_enable();
3079         mutex_unlock(&kvm->lock);
3080 }
3081
3082 /**
3083  * kvm_arch_fault_in_page - fault-in guest page if necessary
3084  * @vcpu: The corresponding virtual cpu
3085  * @gpa: Guest physical address
3086  * @writable: Whether the page should be writable or not
3087  *
3088  * Make sure that a guest page has been faulted-in on the host.
3089  *
3090  * Return: Zero on success, negative error code otherwise.
3091  */
3092 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3093 {
3094         return gmap_fault(vcpu->arch.gmap, gpa,
3095                           writable ? FAULT_FLAG_WRITE : 0);
3096 }
3097
3098 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3099                                       unsigned long token)
3100 {
3101         struct kvm_s390_interrupt inti;
3102         struct kvm_s390_irq irq;
3103
3104         if (start_token) {
3105                 irq.u.ext.ext_params2 = token;
3106                 irq.type = KVM_S390_INT_PFAULT_INIT;
3107                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3108         } else {
3109                 inti.type = KVM_S390_INT_PFAULT_DONE;
3110                 inti.parm64 = token;
3111                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3112         }
3113 }
3114
3115 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3116                                      struct kvm_async_pf *work)
3117 {
3118         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3119         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3120 }
3121
3122 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3123                                  struct kvm_async_pf *work)
3124 {
3125         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3126         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3127 }
3128
3129 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3130                                struct kvm_async_pf *work)
3131 {
3132         /* s390 will always inject the page directly */
3133 }
3134
3135 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3136 {
3137         /*
3138          * s390 will always inject the page directly,
3139          * but we still want check_async_completion to cleanup
3140          */
3141         return true;
3142 }
3143
3144 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3145 {
3146         hva_t hva;
3147         struct kvm_arch_async_pf arch;
3148         int rc;
3149
3150         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3151                 return 0;
3152         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3153             vcpu->arch.pfault_compare)
3154                 return 0;
3155         if (psw_extint_disabled(vcpu))
3156                 return 0;
3157         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3158                 return 0;
3159         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3160                 return 0;
3161         if (!vcpu->arch.gmap->pfault_enabled)
3162                 return 0;
3163
3164         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3165         hva += current->thread.gmap_addr & ~PAGE_MASK;
3166         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3167                 return 0;
3168
3169         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3170         return rc;
3171 }
3172
3173 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3174 {
3175         int rc, cpuflags;
3176
3177         /*
3178          * On s390 notifications for arriving pages will be delivered directly
3179          * to the guest but the house keeping for completed pfaults is
3180          * handled outside the worker.
3181          */
3182         kvm_check_async_pf_completion(vcpu);
3183
3184         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3185         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3186
3187         if (need_resched())
3188                 schedule();
3189
3190         if (test_cpu_flag(CIF_MCCK_PENDING))
3191                 s390_handle_mcck();
3192
3193         if (!kvm_is_ucontrol(vcpu->kvm)) {
3194                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3195                 if (rc)
3196                         return rc;
3197         }
3198
3199         rc = kvm_s390_handle_requests(vcpu);
3200         if (rc)
3201                 return rc;
3202
3203         if (guestdbg_enabled(vcpu)) {
3204                 kvm_s390_backup_guest_per_regs(vcpu);
3205                 kvm_s390_patch_guest_per_regs(vcpu);
3206         }
3207
3208         vcpu->arch.sie_block->icptcode = 0;
3209         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3210         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3211         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3212
3213         return 0;
3214 }
3215
3216 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3217 {
3218         struct kvm_s390_pgm_info pgm_info = {
3219                 .code = PGM_ADDRESSING,
3220         };
3221         u8 opcode, ilen;
3222         int rc;
3223
3224         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3225         trace_kvm_s390_sie_fault(vcpu);
3226
3227         /*
3228          * We want to inject an addressing exception, which is defined as a
3229          * suppressing or terminating exception. However, since we came here
3230          * by a DAT access exception, the PSW still points to the faulting
3231          * instruction since DAT exceptions are nullifying. So we've got
3232          * to look up the current opcode to get the length of the instruction
3233          * to be able to forward the PSW.
3234          */
3235         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3236         ilen = insn_length(opcode);
3237         if (rc < 0) {
3238                 return rc;
3239         } else if (rc) {
3240                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3241                  * Forward by arbitrary ilc, injection will take care of
3242                  * nullification if necessary.
3243                  */
3244                 pgm_info = vcpu->arch.pgm;
3245                 ilen = 4;
3246         }
3247         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3248         kvm_s390_forward_psw(vcpu, ilen);
3249         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3250 }
3251
3252 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3253 {
3254         struct mcck_volatile_info *mcck_info;
3255         struct sie_page *sie_page;
3256
3257         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3258                    vcpu->arch.sie_block->icptcode);
3259         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3260
3261         if (guestdbg_enabled(vcpu))
3262                 kvm_s390_restore_guest_per_regs(vcpu);
3263
3264         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3265         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3266
3267         if (exit_reason == -EINTR) {
3268                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3269                 sie_page = container_of(vcpu->arch.sie_block,
3270                                         struct sie_page, sie_block);
3271                 mcck_info = &sie_page->mcck_info;
3272                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3273                 return 0;
3274         }
3275
3276         if (vcpu->arch.sie_block->icptcode > 0) {
3277                 int rc = kvm_handle_sie_intercept(vcpu);
3278
3279                 if (rc != -EOPNOTSUPP)
3280                         return rc;
3281                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3282                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3283                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3284                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3285                 return -EREMOTE;
3286         } else if (exit_reason != -EFAULT) {
3287                 vcpu->stat.exit_null++;
3288                 return 0;
3289         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3290                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3291                 vcpu->run->s390_ucontrol.trans_exc_code =
3292                                                 current->thread.gmap_addr;
3293                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3294                 return -EREMOTE;
3295         } else if (current->thread.gmap_pfault) {
3296                 trace_kvm_s390_major_guest_pfault(vcpu);
3297                 current->thread.gmap_pfault = 0;
3298                 if (kvm_arch_setup_async_pf(vcpu))
3299                         return 0;
3300                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3301         }
3302         return vcpu_post_run_fault_in_sie(vcpu);
3303 }
3304
3305 static int __vcpu_run(struct kvm_vcpu *vcpu)
3306 {
3307         int rc, exit_reason;
3308
3309         /*
3310          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3311          * ning the guest), so that memslots (and other stuff) are protected
3312          */
3313         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3314
3315         do {
3316                 rc = vcpu_pre_run(vcpu);
3317                 if (rc)
3318                         break;
3319
3320                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3321                 /*
3322                  * As PF_VCPU will be used in fault handler, between
3323                  * guest_enter and guest_exit should be no uaccess.
3324                  */
3325                 local_irq_disable();
3326                 guest_enter_irqoff();
3327                 __disable_cpu_timer_accounting(vcpu);
3328                 local_irq_enable();
3329                 exit_reason = sie64a(vcpu->arch.sie_block,
3330                                      vcpu->run->s.regs.gprs);
3331                 local_irq_disable();
3332                 __enable_cpu_timer_accounting(vcpu);
3333                 guest_exit_irqoff();
3334                 local_irq_enable();
3335                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3336
3337                 rc = vcpu_post_run(vcpu, exit_reason);
3338         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3339
3340         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3341         return rc;
3342 }
3343
3344 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3345 {
3346         struct runtime_instr_cb *riccb;
3347         struct gs_cb *gscb;
3348
3349         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3350         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3351         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3352         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3353         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3354                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3355         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3356                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3357                 /* some control register changes require a tlb flush */
3358                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3359         }
3360         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3361                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3362                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3363                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3364                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3365                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3366         }
3367         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3368                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3369                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3370                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3371                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3372                         kvm_clear_async_pf_completion_queue(vcpu);
3373         }
3374         /*
3375          * If userspace sets the riccb (e.g. after migration) to a valid state,
3376          * we should enable RI here instead of doing the lazy enablement.
3377          */
3378         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3379             test_kvm_facility(vcpu->kvm, 64) &&
3380             riccb->v &&
3381             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3382                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3383                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3384         }
3385         /*
3386          * If userspace sets the gscb (e.g. after migration) to non-zero,
3387          * we should enable GS here instead of doing the lazy enablement.
3388          */
3389         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3390             test_kvm_facility(vcpu->kvm, 133) &&
3391             gscb->gssm &&
3392             !vcpu->arch.gs_enabled) {
3393                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3394                 vcpu->arch.sie_block->ecb |= ECB_GS;
3395                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3396                 vcpu->arch.gs_enabled = 1;
3397         }
3398         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3399             test_kvm_facility(vcpu->kvm, 82)) {
3400                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3401                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3402         }
3403         save_access_regs(vcpu->arch.host_acrs);
3404         restore_access_regs(vcpu->run->s.regs.acrs);
3405         /* save host (userspace) fprs/vrs */
3406         save_fpu_regs();
3407         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3408         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3409         if (MACHINE_HAS_VX)
3410                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3411         else
3412                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3413         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3414         if (test_fp_ctl(current->thread.fpu.fpc))
3415                 /* User space provided an invalid FPC, let's clear it */
3416                 current->thread.fpu.fpc = 0;
3417         if (MACHINE_HAS_GS) {
3418                 preempt_disable();
3419                 __ctl_set_bit(2, 4);
3420                 if (current->thread.gs_cb) {
3421                         vcpu->arch.host_gscb = current->thread.gs_cb;
3422                         save_gs_cb(vcpu->arch.host_gscb);
3423                 }
3424                 if (vcpu->arch.gs_enabled) {
3425                         current->thread.gs_cb = (struct gs_cb *)
3426                                                 &vcpu->run->s.regs.gscb;
3427                         restore_gs_cb(current->thread.gs_cb);
3428                 }
3429                 preempt_enable();
3430         }
3431
3432         kvm_run->kvm_dirty_regs = 0;
3433 }
3434
3435 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3436 {
3437         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3438         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3439         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3440         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3441         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3442         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3443         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3444         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3445         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3446         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3447         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3448         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3449         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3450         save_access_regs(vcpu->run->s.regs.acrs);
3451         restore_access_regs(vcpu->arch.host_acrs);
3452         /* Save guest register state */
3453         save_fpu_regs();
3454         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3455         /* Restore will be done lazily at return */
3456         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3457         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3458         if (MACHINE_HAS_GS) {
3459                 __ctl_set_bit(2, 4);
3460                 if (vcpu->arch.gs_enabled)
3461                         save_gs_cb(current->thread.gs_cb);
3462                 preempt_disable();
3463                 current->thread.gs_cb = vcpu->arch.host_gscb;
3464                 restore_gs_cb(vcpu->arch.host_gscb);
3465                 preempt_enable();
3466                 if (!vcpu->arch.host_gscb)
3467                         __ctl_clear_bit(2, 4);
3468                 vcpu->arch.host_gscb = NULL;
3469         }
3470
3471 }
3472
3473 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3474 {
3475         int rc;
3476
3477         if (kvm_run->immediate_exit)
3478                 return -EINTR;
3479
3480         vcpu_load(vcpu);
3481
3482         if (guestdbg_exit_pending(vcpu)) {
3483                 kvm_s390_prepare_debug_exit(vcpu);
3484                 rc = 0;
3485                 goto out;
3486         }
3487
3488         kvm_sigset_activate(vcpu);
3489
3490         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3491                 kvm_s390_vcpu_start(vcpu);
3492         } else if (is_vcpu_stopped(vcpu)) {
3493                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3494                                    vcpu->vcpu_id);
3495                 rc = -EINVAL;
3496                 goto out;
3497         }
3498
3499         sync_regs(vcpu, kvm_run);
3500         enable_cpu_timer_accounting(vcpu);
3501
3502         might_fault();
3503         rc = __vcpu_run(vcpu);
3504
3505         if (signal_pending(current) && !rc) {
3506                 kvm_run->exit_reason = KVM_EXIT_INTR;
3507                 rc = -EINTR;
3508         }
3509
3510         if (guestdbg_exit_pending(vcpu) && !rc)  {
3511                 kvm_s390_prepare_debug_exit(vcpu);
3512                 rc = 0;
3513         }
3514
3515         if (rc == -EREMOTE) {
3516                 /* userspace support is needed, kvm_run has been prepared */
3517                 rc = 0;
3518         }
3519
3520         disable_cpu_timer_accounting(vcpu);
3521         store_regs(vcpu, kvm_run);
3522
3523         kvm_sigset_deactivate(vcpu);
3524
3525         vcpu->stat.exit_userspace++;
3526 out:
3527         vcpu_put(vcpu);
3528         return rc;
3529 }
3530
3531 /*
3532  * store status at address
3533  * we use have two special cases:
3534  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3535  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3536  */
3537 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3538 {
3539         unsigned char archmode = 1;
3540         freg_t fprs[NUM_FPRS];
3541         unsigned int px;
3542         u64 clkcomp, cputm;
3543         int rc;
3544
3545         px = kvm_s390_get_prefix(vcpu);
3546         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3547                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3548                         return -EFAULT;
3549                 gpa = 0;
3550         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3551                 if (write_guest_real(vcpu, 163, &archmode, 1))
3552                         return -EFAULT;
3553                 gpa = px;
3554         } else
3555                 gpa -= __LC_FPREGS_SAVE_AREA;
3556
3557         /* manually convert vector registers if necessary */
3558         if (MACHINE_HAS_VX) {
3559                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3560                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3561                                      fprs, 128);
3562         } else {
3563                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3564                                      vcpu->run->s.regs.fprs, 128);
3565         }
3566         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3567                               vcpu->run->s.regs.gprs, 128);
3568         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3569                               &vcpu->arch.sie_block->gpsw, 16);
3570         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3571                               &px, 4);
3572         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3573                               &vcpu->run->s.regs.fpc, 4);
3574         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3575                               &vcpu->arch.sie_block->todpr, 4);
3576         cputm = kvm_s390_get_cpu_timer(vcpu);
3577         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3578                               &cputm, 8);
3579         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3580         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3581                               &clkcomp, 8);
3582         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3583                               &vcpu->run->s.regs.acrs, 64);
3584         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3585                               &vcpu->arch.sie_block->gcr, 128);
3586         return rc ? -EFAULT : 0;
3587 }
3588
3589 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3590 {
3591         /*
3592          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3593          * switch in the run ioctl. Let's update our copies before we save
3594          * it into the save area
3595          */
3596         save_fpu_regs();
3597         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3598         save_access_regs(vcpu->run->s.regs.acrs);
3599
3600         return kvm_s390_store_status_unloaded(vcpu, addr);
3601 }
3602
3603 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3604 {
3605         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3606         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3607 }
3608
3609 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3610 {
3611         unsigned int i;
3612         struct kvm_vcpu *vcpu;
3613
3614         kvm_for_each_vcpu(i, vcpu, kvm) {
3615                 __disable_ibs_on_vcpu(vcpu);
3616         }
3617 }
3618
3619 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3620 {
3621         if (!sclp.has_ibs)
3622                 return;
3623         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3624         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3625 }
3626
3627 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3628 {
3629         int i, online_vcpus, started_vcpus = 0;
3630
3631         if (!is_vcpu_stopped(vcpu))
3632                 return;
3633
3634         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3635         /* Only one cpu at a time may enter/leave the STOPPED state. */
3636         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3637         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3638
3639         for (i = 0; i < online_vcpus; i++) {
3640                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3641                         started_vcpus++;
3642         }
3643
3644         if (started_vcpus == 0) {
3645                 /* we're the only active VCPU -> speed it up */
3646                 __enable_ibs_on_vcpu(vcpu);
3647         } else if (started_vcpus == 1) {
3648                 /*
3649                  * As we are starting a second VCPU, we have to disable
3650                  * the IBS facility on all VCPUs to remove potentially
3651                  * oustanding ENABLE requests.
3652                  */
3653                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3654         }
3655
3656         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3657         /*
3658          * Another VCPU might have used IBS while we were offline.
3659          * Let's play safe and flush the VCPU at startup.
3660          */
3661         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3662         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3663         return;
3664 }
3665
3666 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3667 {
3668         int i, online_vcpus, started_vcpus = 0;
3669         struct kvm_vcpu *started_vcpu = NULL;
3670
3671         if (is_vcpu_stopped(vcpu))
3672                 return;
3673
3674         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3675         /* Only one cpu at a time may enter/leave the STOPPED state. */
3676         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3677         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3678
3679         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3680         kvm_s390_clear_stop_irq(vcpu);
3681
3682         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3683         __disable_ibs_on_vcpu(vcpu);
3684
3685         for (i = 0; i < online_vcpus; i++) {
3686                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3687                         started_vcpus++;
3688                         started_vcpu = vcpu->kvm->vcpus[i];
3689                 }
3690         }
3691
3692         if (started_vcpus == 1) {
3693                 /*
3694                  * As we only have one VCPU left, we want to enable the
3695                  * IBS facility for that VCPU to speed it up.
3696                  */
3697                 __enable_ibs_on_vcpu(started_vcpu);
3698         }
3699
3700         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3701         return;
3702 }
3703
3704 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3705                                      struct kvm_enable_cap *cap)
3706 {
3707         int r;
3708
3709         if (cap->flags)
3710                 return -EINVAL;
3711
3712         switch (cap->cap) {
3713         case KVM_CAP_S390_CSS_SUPPORT:
3714                 if (!vcpu->kvm->arch.css_support) {
3715                         vcpu->kvm->arch.css_support = 1;
3716                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3717                         trace_kvm_s390_enable_css(vcpu->kvm);
3718                 }
3719                 r = 0;
3720                 break;
3721         default:
3722                 r = -EINVAL;
3723                 break;
3724         }
3725         return r;
3726 }
3727
3728 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3729                                   struct kvm_s390_mem_op *mop)
3730 {
3731         void __user *uaddr = (void __user *)mop->buf;
3732         void *tmpbuf = NULL;
3733         int r, srcu_idx;
3734         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3735                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3736
3737         if (mop->flags & ~supported_flags)
3738                 return -EINVAL;
3739
3740         if (mop->size > MEM_OP_MAX_SIZE)
3741                 return -E2BIG;
3742
3743         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3744                 tmpbuf = vmalloc(mop->size);
3745                 if (!tmpbuf)
3746                         return -ENOMEM;
3747         }
3748
3749         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3750
3751         switch (mop->op) {
3752         case KVM_S390_MEMOP_LOGICAL_READ:
3753                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3754                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3755                                             mop->size, GACC_FETCH);
3756                         break;
3757                 }
3758                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3759                 if (r == 0) {
3760                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3761                                 r = -EFAULT;
3762                 }
3763                 break;
3764         case KVM_S390_MEMOP_LOGICAL_WRITE:
3765                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3766                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3767                                             mop->size, GACC_STORE);
3768                         break;
3769                 }
3770                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3771                         r = -EFAULT;
3772                         break;
3773                 }
3774                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3775                 break;
3776         default:
3777                 r = -EINVAL;
3778         }
3779
3780         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3781
3782         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3783                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3784
3785         vfree(tmpbuf);
3786         return r;
3787 }
3788
3789 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3790                                unsigned int ioctl, unsigned long arg)
3791 {
3792         struct kvm_vcpu *vcpu = filp->private_data;
3793         void __user *argp = (void __user *)arg;
3794
3795         switch (ioctl) {
3796         case KVM_S390_IRQ: {
3797                 struct kvm_s390_irq s390irq;
3798
3799                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3800                         return -EFAULT;
3801                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3802         }
3803         case KVM_S390_INTERRUPT: {
3804                 struct kvm_s390_interrupt s390int;
3805                 struct kvm_s390_irq s390irq;
3806
3807                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3808                         return -EFAULT;
3809                 if (s390int_to_s390irq(&s390int, &s390irq))
3810                         return -EINVAL;
3811                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3812         }
3813         }
3814         return -ENOIOCTLCMD;
3815 }
3816
3817 long kvm_arch_vcpu_ioctl(struct file *filp,
3818                          unsigned int ioctl, unsigned long arg)
3819 {
3820         struct kvm_vcpu *vcpu = filp->private_data;
3821         void __user *argp = (void __user *)arg;
3822         int idx;
3823         long r;
3824
3825         vcpu_load(vcpu);
3826
3827         switch (ioctl) {
3828         case KVM_S390_STORE_STATUS:
3829                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3830                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3831                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3832                 break;
3833         case KVM_S390_SET_INITIAL_PSW: {
3834                 psw_t psw;
3835
3836                 r = -EFAULT;
3837                 if (copy_from_user(&psw, argp, sizeof(psw)))
3838                         break;
3839                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3840                 break;
3841         }
3842         case KVM_S390_INITIAL_RESET:
3843                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3844                 break;
3845         case KVM_SET_ONE_REG:
3846         case KVM_GET_ONE_REG: {
3847                 struct kvm_one_reg reg;
3848                 r = -EFAULT;
3849                 if (copy_from_user(&reg, argp, sizeof(reg)))
3850                         break;
3851                 if (ioctl == KVM_SET_ONE_REG)
3852                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3853                 else
3854                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3855                 break;
3856         }
3857 #ifdef CONFIG_KVM_S390_UCONTROL
3858         case KVM_S390_UCAS_MAP: {
3859                 struct kvm_s390_ucas_mapping ucasmap;
3860
3861                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3862                         r = -EFAULT;
3863                         break;
3864                 }
3865
3866                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3867                         r = -EINVAL;
3868                         break;
3869                 }
3870
3871                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3872                                      ucasmap.vcpu_addr, ucasmap.length);
3873                 break;
3874         }
3875         case KVM_S390_UCAS_UNMAP: {
3876                 struct kvm_s390_ucas_mapping ucasmap;
3877
3878                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3879                         r = -EFAULT;
3880                         break;
3881                 }
3882
3883                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3884                         r = -EINVAL;
3885                         break;
3886                 }
3887
3888                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3889                         ucasmap.length);
3890                 break;
3891         }
3892 #endif
3893         case KVM_S390_VCPU_FAULT: {
3894                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3895                 break;
3896         }
3897         case KVM_ENABLE_CAP:
3898         {
3899                 struct kvm_enable_cap cap;
3900                 r = -EFAULT;
3901                 if (copy_from_user(&cap, argp, sizeof(cap)))
3902                         break;
3903                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3904                 break;
3905         }
3906         case KVM_S390_MEM_OP: {
3907                 struct kvm_s390_mem_op mem_op;
3908
3909                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3910                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3911                 else
3912                         r = -EFAULT;
3913                 break;
3914         }
3915         case KVM_S390_SET_IRQ_STATE: {
3916                 struct kvm_s390_irq_state irq_state;
3917
3918                 r = -EFAULT;
3919                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3920                         break;
3921                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3922                     irq_state.len == 0 ||
3923                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3924                         r = -EINVAL;
3925                         break;
3926                 }
3927                 /* do not use irq_state.flags, it will break old QEMUs */
3928                 r = kvm_s390_set_irq_state(vcpu,
3929                                            (void __user *) irq_state.buf,
3930                                            irq_state.len);
3931                 break;
3932         }
3933         case KVM_S390_GET_IRQ_STATE: {
3934                 struct kvm_s390_irq_state irq_state;
3935
3936                 r = -EFAULT;
3937                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3938                         break;
3939                 if (irq_state.len == 0) {
3940                         r = -EINVAL;
3941                         break;
3942                 }
3943                 /* do not use irq_state.flags, it will break old QEMUs */
3944                 r = kvm_s390_get_irq_state(vcpu,
3945                                            (__u8 __user *)  irq_state.buf,
3946                                            irq_state.len);
3947                 break;
3948         }
3949         default:
3950                 r = -ENOTTY;
3951         }
3952
3953         vcpu_put(vcpu);
3954         return r;
3955 }
3956
3957 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3958 {
3959 #ifdef CONFIG_KVM_S390_UCONTROL
3960         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3961                  && (kvm_is_ucontrol(vcpu->kvm))) {
3962                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3963                 get_page(vmf->page);
3964                 return 0;
3965         }
3966 #endif
3967         return VM_FAULT_SIGBUS;
3968 }
3969
3970 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3971                             unsigned long npages)
3972 {
3973         return 0;
3974 }
3975
3976 /* Section: memory related */
3977 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3978                                    struct kvm_memory_slot *memslot,
3979                                    const struct kvm_userspace_memory_region *mem,
3980                                    enum kvm_mr_change change)
3981 {
3982         /* A few sanity checks. We can have memory slots which have to be
3983            located/ended at a segment boundary (1MB). The memory in userland is
3984            ok to be fragmented into various different vmas. It is okay to mmap()
3985            and munmap() stuff in this slot after doing this call at any time */
3986
3987         if (mem->userspace_addr & 0xffffful)
3988                 return -EINVAL;
3989
3990         if (mem->memory_size & 0xffffful)
3991                 return -EINVAL;
3992
3993         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3994                 return -EINVAL;
3995
3996         return 0;
3997 }
3998
3999 void kvm_arch_commit_memory_region(struct kvm *kvm,
4000                                 const struct kvm_userspace_memory_region *mem,
4001                                 const struct kvm_memory_slot *old,
4002                                 const struct kvm_memory_slot *new,
4003                                 enum kvm_mr_change change)
4004 {
4005         int rc;
4006
4007         /* If the basics of the memslot do not change, we do not want
4008          * to update the gmap. Every update causes several unnecessary
4009          * segment translation exceptions. This is usually handled just
4010          * fine by the normal fault handler + gmap, but it will also
4011          * cause faults on the prefix page of running guest CPUs.
4012          */
4013         if (old->userspace_addr == mem->userspace_addr &&
4014             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4015             old->npages * PAGE_SIZE == mem->memory_size)
4016                 return;
4017
4018         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4019                 mem->guest_phys_addr, mem->memory_size);
4020         if (rc)
4021                 pr_warn("failed to commit memory region\n");
4022         return;
4023 }
4024
4025 static inline unsigned long nonhyp_mask(int i)
4026 {
4027         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4028
4029         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4030 }
4031
4032 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4033 {
4034         vcpu->valid_wakeup = false;
4035 }
4036
4037 static int __init kvm_s390_init(void)
4038 {
4039         int i;
4040
4041         if (!sclp.has_sief2) {
4042                 pr_info("SIE not available\n");
4043                 return -ENODEV;
4044         }
4045
4046         for (i = 0; i < 16; i++)
4047                 kvm_s390_fac_list_mask[i] |=
4048                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4049
4050         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4051 }
4052
4053 static void __exit kvm_s390_exit(void)
4054 {
4055         kvm_exit();
4056 }
4057
4058 module_init(kvm_s390_init);
4059 module_exit(kvm_s390_exit);
4060
4061 /*
4062  * Enable autoloading of the kvm module.
4063  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4064  * since x86 takes a different approach.
4065  */
4066 #include <linux/miscdevice.h>
4067 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4068 MODULE_ALIAS("devname:kvm");