arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/module.h>
  25 #include <linux/random.h>
  26 #include <linux/slab.h>
  27 #include <linux/timer.h>
  28 #include <linux/vmalloc.h>
  29 #include <asm/asm-offsets.h>
  30 #include <asm/lowcore.h>
  31 #include <asm/etr.h>
  32 #include <asm/pgtable.h>
  33 #include <asm/nmi.h>
  34 #include <asm/switch_to.h>
  35 #include <asm/isc.h>
  36 #include <asm/sclp.h>
  37 #include "kvm-s390.h"
  38 #include "gaccess.h"
  39
  40 #define KMSG_COMPONENT "kvm-s390"
  41 #undef pr_fmt
  42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  43
  44 #define CREATE_TRACE_POINTS
  45 #include "trace.h"
  46 #include "trace-s390.h"
  47
  48 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  49 #define LOCAL_IRQS 32
  50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  51                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  52
  53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  54
  55 struct kvm_stats_debugfs_item debugfs_entries[] = {
  56         { "userspace_handled", VCPU_STAT(exit_userspace) },
  57         { "exit_null", VCPU_STAT(exit_null) },
  58         { "exit_validity", VCPU_STAT(exit_validity) },
  59         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  60         { "exit_external_request", VCPU_STAT(exit_external_request) },
  61         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  62         { "exit_instruction", VCPU_STAT(exit_instruction) },
  63         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  64         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  65         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  66         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  67         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  68         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  69         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  70         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  71         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  72         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  73         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  74         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  75         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  76         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  77         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  78         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  79         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  80         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  81         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  82         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  83         { "instruction_spx", VCPU_STAT(instruction_spx) },
  84         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  85         { "instruction_stap", VCPU_STAT(instruction_stap) },
  86         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  87         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  88         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  89         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  90         { "instruction_essa", VCPU_STAT(instruction_essa) },
  91         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  92         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
  93         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
  94         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
  95         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
  96         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
  97         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
  98         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
  99         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 100         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 101         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 102         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 103         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 104         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 105         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 106         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 107         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 108         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 109         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 110         { "diagnose_10", VCPU_STAT(diagnose_10) },
 111         { "diagnose_44", VCPU_STAT(diagnose_44) },
 112         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 113         { "diagnose_258", VCPU_STAT(diagnose_258) },
 114         { "diagnose_308", VCPU_STAT(diagnose_308) },
 115         { "diagnose_500", VCPU_STAT(diagnose_500) },
 116         { NULL }
 117 };
 118
 119 /* upper facilities limit for kvm */
 120 unsigned long kvm_s390_fac_list_mask[] = {
 121         0xffe6fffbfcfdfc40UL,
 122         0x005e800000000000UL,
 123 };
 124
 125 unsigned long kvm_s390_fac_list_mask_size(void)
 126 {
 127         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 128         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 129 }
 130
 131 static struct gmap_notifier gmap_notifier;
 132 debug_info_t *kvm_s390_dbf;
 133
 134 /* Section: not file related */
 135 int kvm_arch_hardware_enable(void)
 136 {
 137         /* every s390 is virtualization enabled ;-) */
 138         return 0;
 139 }
 140
 141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 142
 143 /*
 144  * This callback is executed during stop_machine(). All CPUs are therefore
 145  * temporarily stopped. In order not to change guest behavior, we have to
 146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 147  * so a CPU won't be stopped while calculating with the epoch.
 148  */
 149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 150                           void *v)
 151 {
 152         struct kvm *kvm;
 153         struct kvm_vcpu *vcpu;
 154         int i;
 155         unsigned long long *delta = v;
 156
 157         list_for_each_entry(kvm, &vm_list, vm_list) {
 158                 kvm->arch.epoch -= *delta;
 159                 kvm_for_each_vcpu(i, vcpu, kvm) {
 160                         vcpu->arch.sie_block->epoch -= *delta;
 161                 }
 162         }
 163         return NOTIFY_OK;
 164 }
 165
 166 static struct notifier_block kvm_clock_notifier = {
 167         .notifier_call = kvm_clock_sync,
 168 };
 169
 170 int kvm_arch_hardware_setup(void)
 171 {
 172         gmap_notifier.notifier_call = kvm_gmap_notifier;
 173         gmap_register_ipte_notifier(&gmap_notifier);
 174         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 175                                        &kvm_clock_notifier);
 176         return 0;
 177 }
 178
 179 void kvm_arch_hardware_unsetup(void)
 180 {
 181         gmap_unregister_ipte_notifier(&gmap_notifier);
 182         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 183                                          &kvm_clock_notifier);
 184 }
 185
 186 int kvm_arch_init(void *opaque)
 187 {
 188         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 189         if (!kvm_s390_dbf)
 190                 return -ENOMEM;
 191
 192         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 193                 debug_unregister(kvm_s390_dbf);
 194                 return -ENOMEM;
 195         }
 196
 197         /* Register floating interrupt controller interface. */
 198         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 199 }
 200
 201 void kvm_arch_exit(void)
 202 {
 203         debug_unregister(kvm_s390_dbf);
 204 }
 205
 206 /* Section: device related */
 207 long kvm_arch_dev_ioctl(struct file *filp,
 208                         unsigned int ioctl, unsigned long arg)
 209 {
 210         if (ioctl == KVM_S390_ENABLE_SIE)
 211                 return s390_enable_sie();
 212         return -EINVAL;
 213 }
 214
 215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 216 {
 217         int r;
 218
 219         switch (ext) {
 220         case KVM_CAP_S390_PSW:
 221         case KVM_CAP_S390_GMAP:
 222         case KVM_CAP_SYNC_MMU:
 223 #ifdef CONFIG_KVM_S390_UCONTROL
 224         case KVM_CAP_S390_UCONTROL:
 225 #endif
 226         case KVM_CAP_ASYNC_PF:
 227         case KVM_CAP_SYNC_REGS:
 228         case KVM_CAP_ONE_REG:
 229         case KVM_CAP_ENABLE_CAP:
 230         case KVM_CAP_S390_CSS_SUPPORT:
 231         case KVM_CAP_IOEVENTFD:
 232         case KVM_CAP_DEVICE_CTRL:
 233         case KVM_CAP_ENABLE_CAP_VM:
 234         case KVM_CAP_S390_IRQCHIP:
 235         case KVM_CAP_VM_ATTRIBUTES:
 236         case KVM_CAP_MP_STATE:
 237         case KVM_CAP_S390_INJECT_IRQ:
 238         case KVM_CAP_S390_USER_SIGP:
 239         case KVM_CAP_S390_USER_STSI:
 240         case KVM_CAP_S390_SKEYS:
 241         case KVM_CAP_S390_IRQ_STATE:
 242                 r = 1;
 243                 break;
 244         case KVM_CAP_S390_MEM_OP:
 245                 r = MEM_OP_MAX_SIZE;
 246                 break;
 247         case KVM_CAP_NR_VCPUS:
 248         case KVM_CAP_MAX_VCPUS:
 249                 r = KVM_MAX_VCPUS;
 250                 break;
 251         case KVM_CAP_NR_MEMSLOTS:
 252                 r = KVM_USER_MEM_SLOTS;
 253                 break;
 254         case KVM_CAP_S390_COW:
 255                 r = MACHINE_HAS_ESOP;
 256                 break;
 257         case KVM_CAP_S390_VECTOR_REGISTERS:
 258                 r = MACHINE_HAS_VX;
 259                 break;
 260         default:
 261                 r = 0;
 262         }
 263         return r;
 264 }
 265
 266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 267                                         struct kvm_memory_slot *memslot)
 268 {
 269         gfn_t cur_gfn, last_gfn;
 270         unsigned long address;
 271         struct gmap *gmap = kvm->arch.gmap;
 272
 273         down_read(&gmap->mm->mmap_sem);
 274         /* Loop over all guest pages */
 275         last_gfn = memslot->base_gfn + memslot->npages;
 276         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 277                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 278
 279                 if (gmap_test_and_clear_dirty(address, gmap))
 280                         mark_page_dirty(kvm, cur_gfn);
 281         }
 282         up_read(&gmap->mm->mmap_sem);
 283 }
 284
 285 /* Section: vm related */
 286 /*
 287  * Get (and clear) the dirty memory log for a memory slot.
 288  */
 289 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 290                                struct kvm_dirty_log *log)
 291 {
 292         int r;
 293         unsigned long n;
 294         struct kvm_memslots *slots;
 295         struct kvm_memory_slot *memslot;
 296         int is_dirty = 0;
 297
 298         mutex_lock(&kvm->slots_lock);
 299
 300         r = -EINVAL;
 301         if (log->slot >= KVM_USER_MEM_SLOTS)
 302                 goto out;
 303
 304         slots = kvm_memslots(kvm);
 305         memslot = id_to_memslot(slots, log->slot);
 306         r = -ENOENT;
 307         if (!memslot->dirty_bitmap)
 308                 goto out;
 309
 310         kvm_s390_sync_dirty_log(kvm, memslot);
 311         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 312         if (r)
 313                 goto out;
 314
 315         /* Clear the dirty log */
 316         if (is_dirty) {
 317                 n = kvm_dirty_bitmap_bytes(memslot);
 318                 memset(memslot->dirty_bitmap, 0, n);
 319         }
 320         r = 0;
 321 out:
 322         mutex_unlock(&kvm->slots_lock);
 323         return r;
 324 }
 325
 326 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 327 {
 328         int r;
 329
 330         if (cap->flags)
 331                 return -EINVAL;
 332
 333         switch (cap->cap) {
 334         case KVM_CAP_S390_IRQCHIP:
 335                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 336                 kvm->arch.use_irqchip = 1;
 337                 r = 0;
 338                 break;
 339         case KVM_CAP_S390_USER_SIGP:
 340                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 341                 kvm->arch.user_sigp = 1;
 342                 r = 0;
 343                 break;
 344         case KVM_CAP_S390_VECTOR_REGISTERS:
 345                 if (MACHINE_HAS_VX) {
 346                         set_kvm_facility(kvm->arch.model.fac->mask, 129);
 347                         set_kvm_facility(kvm->arch.model.fac->list, 129);
 348                         r = 0;
 349                 } else
 350                         r = -EINVAL;
 351                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 352                          r ? "(not available)" : "(success)");
 353                 break;
 354         case KVM_CAP_S390_USER_STSI:
 355                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 356                 kvm->arch.user_stsi = 1;
 357                 r = 0;
 358                 break;
 359         default:
 360                 r = -EINVAL;
 361                 break;
 362         }
 363         return r;
 364 }
 365
 366 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 367 {
 368         int ret;
 369
 370         switch (attr->attr) {
 371         case KVM_S390_VM_MEM_LIMIT_SIZE:
 372                 ret = 0;
 373                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 374                          kvm->arch.gmap->asce_end);
 375                 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
 376                         ret = -EFAULT;
 377                 break;
 378         default:
 379                 ret = -ENXIO;
 380                 break;
 381         }
 382         return ret;
 383 }
 384
 385 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 386 {
 387         int ret;
 388         unsigned int idx;
 389         switch (attr->attr) {
 390         case KVM_S390_VM_MEM_ENABLE_CMMA:
 391                 /* enable CMMA only for z10 and later (EDAT_1) */
 392                 ret = -EINVAL;
 393                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
 394                         break;
 395
 396                 ret = -EBUSY;
 397                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 398                 mutex_lock(&kvm->lock);
 399                 if (atomic_read(&kvm->online_vcpus) == 0) {
 400                         kvm->arch.use_cmma = 1;
 401                         ret = 0;
 402                 }
 403                 mutex_unlock(&kvm->lock);
 404                 break;
 405         case KVM_S390_VM_MEM_CLR_CMMA:
 406                 ret = -EINVAL;
 407                 if (!kvm->arch.use_cmma)
 408                         break;
 409
 410                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 411                 mutex_lock(&kvm->lock);
 412                 idx = srcu_read_lock(&kvm->srcu);
 413                 s390_reset_cmma(kvm->arch.gmap->mm);
 414                 srcu_read_unlock(&kvm->srcu, idx);
 415                 mutex_unlock(&kvm->lock);
 416                 ret = 0;
 417                 break;
 418         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 419                 unsigned long new_limit;
 420
 421                 if (kvm_is_ucontrol(kvm))
 422                         return -EINVAL;
 423
 424                 if (get_user(new_limit, (u64 __user *)attr->addr))
 425                         return -EFAULT;
 426
 427                 if (new_limit > kvm->arch.gmap->asce_end)
 428                         return -E2BIG;
 429
 430                 ret = -EBUSY;
 431                 mutex_lock(&kvm->lock);
 432                 if (atomic_read(&kvm->online_vcpus) == 0) {
 433                         /* gmap_alloc will round the limit up */
 434                         struct gmap *new = gmap_alloc(current->mm, new_limit);
 435
 436                         if (!new) {
 437                                 ret = -ENOMEM;
 438                         } else {
 439                                 gmap_free(kvm->arch.gmap);
 440                                 new->private = kvm;
 441                                 kvm->arch.gmap = new;
 442                                 ret = 0;
 443                         }
 444                 }
 445                 mutex_unlock(&kvm->lock);
 446                 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
 447                 break;
 448         }
 449         default:
 450                 ret = -ENXIO;
 451                 break;
 452         }
 453         return ret;
 454 }
 455
 456 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 457
 458 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 459 {
 460         struct kvm_vcpu *vcpu;
 461         int i;
 462
 463         if (!test_kvm_facility(kvm, 76))
 464                 return -EINVAL;
 465
 466         mutex_lock(&kvm->lock);
 467         switch (attr->attr) {
 468         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 469                 get_random_bytes(
 470                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 471                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 472                 kvm->arch.crypto.aes_kw = 1;
 473                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 474                 break;
 475         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 476                 get_random_bytes(
 477                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 478                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 479                 kvm->arch.crypto.dea_kw = 1;
 480                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 481                 break;
 482         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 483                 kvm->arch.crypto.aes_kw = 0;
 484                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 485                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 486                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 487                 break;
 488         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 489                 kvm->arch.crypto.dea_kw = 0;
 490                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 491                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 492                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 493                 break;
 494         default:
 495                 mutex_unlock(&kvm->lock);
 496                 return -ENXIO;
 497         }
 498
 499         kvm_for_each_vcpu(i, vcpu, kvm) {
 500                 kvm_s390_vcpu_crypto_setup(vcpu);
 501                 exit_sie(vcpu);
 502         }
 503         mutex_unlock(&kvm->lock);
 504         return 0;
 505 }
 506
 507 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 508 {
 509         u8 gtod_high;
 510
 511         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 512                                            sizeof(gtod_high)))
 513                 return -EFAULT;
 514
 515         if (gtod_high != 0)
 516                 return -EINVAL;
 517         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 518
 519         return 0;
 520 }
 521
 522 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 523 {
 524         u64 gtod;
 525
 526         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 527                 return -EFAULT;
 528
 529         kvm_s390_set_tod_clock(kvm, gtod);
 530         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 531         return 0;
 532 }
 533
 534 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 535 {
 536         int ret;
 537
 538         if (attr->flags)
 539                 return -EINVAL;
 540
 541         switch (attr->attr) {
 542         case KVM_S390_VM_TOD_HIGH:
 543                 ret = kvm_s390_set_tod_high(kvm, attr);
 544                 break;
 545         case KVM_S390_VM_TOD_LOW:
 546                 ret = kvm_s390_set_tod_low(kvm, attr);
 547                 break;
 548         default:
 549                 ret = -ENXIO;
 550                 break;
 551         }
 552         return ret;
 553 }
 554
 555 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 556 {
 557         u8 gtod_high = 0;
 558
 559         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 560                                          sizeof(gtod_high)))
 561                 return -EFAULT;
 562         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 563
 564         return 0;
 565 }
 566
 567 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 568 {
 569         u64 gtod;
 570
 571         gtod = kvm_s390_get_tod_clock_fast(kvm);
 572         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 573                 return -EFAULT;
 574         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 575
 576         return 0;
 577 }
 578
 579 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 580 {
 581         int ret;
 582
 583         if (attr->flags)
 584                 return -EINVAL;
 585
 586         switch (attr->attr) {
 587         case KVM_S390_VM_TOD_HIGH:
 588                 ret = kvm_s390_get_tod_high(kvm, attr);
 589                 break;
 590         case KVM_S390_VM_TOD_LOW:
 591                 ret = kvm_s390_get_tod_low(kvm, attr);
 592                 break;
 593         default:
 594                 ret = -ENXIO;
 595                 break;
 596         }
 597         return ret;
 598 }
 599
 600 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 601 {
 602         struct kvm_s390_vm_cpu_processor *proc;
 603         int ret = 0;
 604
 605         mutex_lock(&kvm->lock);
 606         if (atomic_read(&kvm->online_vcpus)) {
 607                 ret = -EBUSY;
 608                 goto out;
 609         }
 610         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 611         if (!proc) {
 612                 ret = -ENOMEM;
 613                 goto out;
 614         }
 615         if (!copy_from_user(proc, (void __user *)attr->addr,
 616                             sizeof(*proc))) {
 617                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
 618                        sizeof(struct cpuid));
 619                 kvm->arch.model.ibc = proc->ibc;
 620                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
 621                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 622         } else
 623                 ret = -EFAULT;
 624         kfree(proc);
 625 out:
 626         mutex_unlock(&kvm->lock);
 627         return ret;
 628 }
 629
 630 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 631 {
 632         int ret = -ENXIO;
 633
 634         switch (attr->attr) {
 635         case KVM_S390_VM_CPU_PROCESSOR:
 636                 ret = kvm_s390_set_processor(kvm, attr);
 637                 break;
 638         }
 639         return ret;
 640 }
 641
 642 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 643 {
 644         struct kvm_s390_vm_cpu_processor *proc;
 645         int ret = 0;
 646
 647         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 648         if (!proc) {
 649                 ret = -ENOMEM;
 650                 goto out;
 651         }
 652         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
 653         proc->ibc = kvm->arch.model.ibc;
 654         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
 655         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 656                 ret = -EFAULT;
 657         kfree(proc);
 658 out:
 659         return ret;
 660 }
 661
 662 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 663 {
 664         struct kvm_s390_vm_cpu_machine *mach;
 665         int ret = 0;
 666
 667         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 668         if (!mach) {
 669                 ret = -ENOMEM;
 670                 goto out;
 671         }
 672         get_cpu_id((struct cpuid *) &mach->cpuid);
 673         mach->ibc = sclp.ibc;
 674         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
 675                S390_ARCH_FAC_LIST_SIZE_BYTE);
 676         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 677                S390_ARCH_FAC_LIST_SIZE_BYTE);
 678         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 679                 ret = -EFAULT;
 680         kfree(mach);
 681 out:
 682         return ret;
 683 }
 684
 685 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 686 {
 687         int ret = -ENXIO;
 688
 689         switch (attr->attr) {
 690         case KVM_S390_VM_CPU_PROCESSOR:
 691                 ret = kvm_s390_get_processor(kvm, attr);
 692                 break;
 693         case KVM_S390_VM_CPU_MACHINE:
 694                 ret = kvm_s390_get_machine(kvm, attr);
 695                 break;
 696         }
 697         return ret;
 698 }
 699
 700 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 701 {
 702         int ret;
 703
 704         switch (attr->group) {
 705         case KVM_S390_VM_MEM_CTRL:
 706                 ret = kvm_s390_set_mem_control(kvm, attr);
 707                 break;
 708         case KVM_S390_VM_TOD:
 709                 ret = kvm_s390_set_tod(kvm, attr);
 710                 break;
 711         case KVM_S390_VM_CPU_MODEL:
 712                 ret = kvm_s390_set_cpu_model(kvm, attr);
 713                 break;
 714         case KVM_S390_VM_CRYPTO:
 715                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 716                 break;
 717         default:
 718                 ret = -ENXIO;
 719                 break;
 720         }
 721
 722         return ret;
 723 }
 724
 725 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 726 {
 727         int ret;
 728
 729         switch (attr->group) {
 730         case KVM_S390_VM_MEM_CTRL:
 731                 ret = kvm_s390_get_mem_control(kvm, attr);
 732                 break;
 733         case KVM_S390_VM_TOD:
 734                 ret = kvm_s390_get_tod(kvm, attr);
 735                 break;
 736         case KVM_S390_VM_CPU_MODEL:
 737                 ret = kvm_s390_get_cpu_model(kvm, attr);
 738                 break;
 739         default:
 740                 ret = -ENXIO;
 741                 break;
 742         }
 743
 744         return ret;
 745 }
 746
 747 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 748 {
 749         int ret;
 750
 751         switch (attr->group) {
 752         case KVM_S390_VM_MEM_CTRL:
 753                 switch (attr->attr) {
 754                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 755                 case KVM_S390_VM_MEM_CLR_CMMA:
 756                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 757                         ret = 0;
 758                         break;
 759                 default:
 760                         ret = -ENXIO;
 761                         break;
 762                 }
 763                 break;
 764         case KVM_S390_VM_TOD:
 765                 switch (attr->attr) {
 766                 case KVM_S390_VM_TOD_LOW:
 767                 case KVM_S390_VM_TOD_HIGH:
 768                         ret = 0;
 769                         break;
 770                 default:
 771                         ret = -ENXIO;
 772                         break;
 773                 }
 774                 break;
 775         case KVM_S390_VM_CPU_MODEL:
 776                 switch (attr->attr) {
 777                 case KVM_S390_VM_CPU_PROCESSOR:
 778                 case KVM_S390_VM_CPU_MACHINE:
 779                         ret = 0;
 780                         break;
 781                 default:
 782                         ret = -ENXIO;
 783                         break;
 784                 }
 785                 break;
 786         case KVM_S390_VM_CRYPTO:
 787                 switch (attr->attr) {
 788                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 789                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 790                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 791                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 792                         ret = 0;
 793                         break;
 794                 default:
 795                         ret = -ENXIO;
 796                         break;
 797                 }
 798                 break;
 799         default:
 800                 ret = -ENXIO;
 801                 break;
 802         }
 803
 804         return ret;
 805 }
 806
 807 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 808 {
 809         uint8_t *keys;
 810         uint64_t hva;
 811         unsigned long curkey;
 812         int i, r = 0;
 813
 814         if (args->flags != 0)
 815                 return -EINVAL;
 816
 817         /* Is this guest using storage keys? */
 818         if (!mm_use_skey(current->mm))
 819                 return KVM_S390_GET_SKEYS_NONE;
 820
 821         /* Enforce sane limit on memory allocation */
 822         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 823                 return -EINVAL;
 824
 825         keys = kmalloc_array(args->count, sizeof(uint8_t),
 826                              GFP_KERNEL | __GFP_NOWARN);
 827         if (!keys)
 828                 keys = vmalloc(sizeof(uint8_t) * args->count);
 829         if (!keys)
 830                 return -ENOMEM;
 831
 832         for (i = 0; i < args->count; i++) {
 833                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 834                 if (kvm_is_error_hva(hva)) {
 835                         r = -EFAULT;
 836                         goto out;
 837                 }
 838
 839                 curkey = get_guest_storage_key(current->mm, hva);
 840                 if (IS_ERR_VALUE(curkey)) {
 841                         r = curkey;
 842                         goto out;
 843                 }
 844                 keys[i] = curkey;
 845         }
 846
 847         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
 848                          sizeof(uint8_t) * args->count);
 849         if (r)
 850                 r = -EFAULT;
 851 out:
 852         kvfree(keys);
 853         return r;
 854 }
 855
 856 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 857 {
 858         uint8_t *keys;
 859         uint64_t hva;
 860         int i, r = 0;
 861
 862         if (args->flags != 0)
 863                 return -EINVAL;
 864
 865         /* Enforce sane limit on memory allocation */
 866         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 867                 return -EINVAL;
 868
 869         keys = kmalloc_array(args->count, sizeof(uint8_t),
 870                              GFP_KERNEL | __GFP_NOWARN);
 871         if (!keys)
 872                 keys = vmalloc(sizeof(uint8_t) * args->count);
 873         if (!keys)
 874                 return -ENOMEM;
 875
 876         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
 877                            sizeof(uint8_t) * args->count);
 878         if (r) {
 879                 r = -EFAULT;
 880                 goto out;
 881         }
 882
 883         /* Enable storage key handling for the guest */
 884         r = s390_enable_skey();
 885         if (r)
 886                 goto out;
 887
 888         for (i = 0; i < args->count; i++) {
 889                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 890                 if (kvm_is_error_hva(hva)) {
 891                         r = -EFAULT;
 892                         goto out;
 893                 }
 894
 895                 /* Lowest order bit is reserved */
 896                 if (keys[i] & 0x01) {
 897                         r = -EINVAL;
 898                         goto out;
 899                 }
 900
 901                 r = set_guest_storage_key(current->mm, hva,
 902                                           (unsigned long)keys[i], 0);
 903                 if (r)
 904                         goto out;
 905         }
 906 out:
 907         kvfree(keys);
 908         return r;
 909 }
 910
 911 long kvm_arch_vm_ioctl(struct file *filp,
 912                        unsigned int ioctl, unsigned long arg)
 913 {
 914         struct kvm *kvm = filp->private_data;
 915         void __user *argp = (void __user *)arg;
 916         struct kvm_device_attr attr;
 917         int r;
 918
 919         switch (ioctl) {
 920         case KVM_S390_INTERRUPT: {
 921                 struct kvm_s390_interrupt s390int;
 922
 923                 r = -EFAULT;
 924                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
 925                         break;
 926                 r = kvm_s390_inject_vm(kvm, &s390int);
 927                 break;
 928         }
 929         case KVM_ENABLE_CAP: {
 930                 struct kvm_enable_cap cap;
 931                 r = -EFAULT;
 932                 if (copy_from_user(&cap, argp, sizeof(cap)))
 933                         break;
 934                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 935                 break;
 936         }
 937         case KVM_CREATE_IRQCHIP: {
 938                 struct kvm_irq_routing_entry routing;
 939
 940                 r = -EINVAL;
 941                 if (kvm->arch.use_irqchip) {
 942                         /* Set up dummy routing. */
 943                         memset(&routing, 0, sizeof(routing));
 944                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
 945                 }
 946                 break;
 947         }
 948         case KVM_SET_DEVICE_ATTR: {
 949                 r = -EFAULT;
 950                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 951                         break;
 952                 r = kvm_s390_vm_set_attr(kvm, &attr);
 953                 break;
 954         }
 955         case KVM_GET_DEVICE_ATTR: {
 956                 r = -EFAULT;
 957                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 958                         break;
 959                 r = kvm_s390_vm_get_attr(kvm, &attr);
 960                 break;
 961         }
 962         case KVM_HAS_DEVICE_ATTR: {
 963                 r = -EFAULT;
 964                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 965                         break;
 966                 r = kvm_s390_vm_has_attr(kvm, &attr);
 967                 break;
 968         }
 969         case KVM_S390_GET_SKEYS: {
 970                 struct kvm_s390_skeys args;
 971
 972                 r = -EFAULT;
 973                 if (copy_from_user(&args, argp,
 974                                    sizeof(struct kvm_s390_skeys)))
 975                         break;
 976                 r = kvm_s390_get_skeys(kvm, &args);
 977                 break;
 978         }
 979         case KVM_S390_SET_SKEYS: {
 980                 struct kvm_s390_skeys args;
 981
 982                 r = -EFAULT;
 983                 if (copy_from_user(&args, argp,
 984                                    sizeof(struct kvm_s390_skeys)))
 985                         break;
 986                 r = kvm_s390_set_skeys(kvm, &args);
 987                 break;
 988         }
 989         default:
 990                 r = -ENOTTY;
 991         }
 992
 993         return r;
 994 }
 995
 996 static int kvm_s390_query_ap_config(u8 *config)
 997 {
 998         u32 fcn_code = 0x04000000UL;
 999         u32 cc = 0;
1000
1001         memset(config, 0, 128);
1002         asm volatile(
1003                 "lgr 0,%1\n"
1004                 "lgr 2,%2\n"
1005                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1006                 "0: ipm %0\n"
1007                 "srl %0,28\n"
1008                 "1:\n"
1009                 EX_TABLE(0b, 1b)
1010                 : "+r" (cc)
1011                 : "r" (fcn_code), "r" (config)
1012                 : "cc", "0", "2", "memory"
1013         );
1014
1015         return cc;
1016 }
1017
1018 static int kvm_s390_apxa_installed(void)
1019 {
1020         u8 config[128];
1021         int cc;
1022
1023         if (test_facility(2) && test_facility(12)) {
1024                 cc = kvm_s390_query_ap_config(config);
1025
1026                 if (cc)
1027                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1028                 else
1029                         return config[0] & 0x40;
1030         }
1031
1032         return 0;
1033 }
1034
1035 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1036 {
1037         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1038
1039         if (kvm_s390_apxa_installed())
1040                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1041         else
1042                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1043 }
1044
1045 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1046 {
1047         get_cpu_id(cpu_id);
1048         cpu_id->version = 0xff;
1049 }
1050
1051 static int kvm_s390_crypto_init(struct kvm *kvm)
1052 {
1053         if (!test_kvm_facility(kvm, 76))
1054                 return 0;
1055
1056         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1057                                          GFP_KERNEL | GFP_DMA);
1058         if (!kvm->arch.crypto.crycb)
1059                 return -ENOMEM;
1060
1061         kvm_s390_set_crycb_format(kvm);
1062
1063         /* Enable AES/DEA protected key functions by default */
1064         kvm->arch.crypto.aes_kw = 1;
1065         kvm->arch.crypto.dea_kw = 1;
1066         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1067                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1068         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1069                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1070
1071         return 0;
1072 }
1073
1074 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1075 {
1076         int i, rc;
1077         char debug_name[16];
1078         static unsigned long sca_offset;
1079
1080         rc = -EINVAL;
1081 #ifdef CONFIG_KVM_S390_UCONTROL
1082         if (type & ~KVM_VM_S390_UCONTROL)
1083                 goto out_err;
1084         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1085                 goto out_err;
1086 #else
1087         if (type)
1088                 goto out_err;
1089 #endif
1090
1091         rc = s390_enable_sie();
1092         if (rc)
1093                 goto out_err;
1094
1095         rc = -ENOMEM;
1096
1097         kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1098         if (!kvm->arch.sca)
1099                 goto out_err;
1100         spin_lock(&kvm_lock);
1101         sca_offset += 16;
1102         if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
1103                 sca_offset = 0;
1104         kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1105         spin_unlock(&kvm_lock);
1106
1107         sprintf(debug_name, "kvm-%u", current->pid);
1108
1109         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1110         if (!kvm->arch.dbf)
1111                 goto out_err;
1112
1113         /*
1114          * The architectural maximum amount of facilities is 16 kbit. To store
1115          * this amount, 2 kbyte of memory is required. Thus we need a full
1116          * page to hold the guest facility list (arch.model.fac->list) and the
1117          * facility mask (arch.model.fac->mask). Its address size has to be
1118          * 31 bits and word aligned.
1119          */
1120         kvm->arch.model.fac =
1121                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1122         if (!kvm->arch.model.fac)
1123                 goto out_err;
1124
1125         /* Populate the facility mask initially. */
1126         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1127                S390_ARCH_FAC_LIST_SIZE_BYTE);
1128         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1129                 if (i < kvm_s390_fac_list_mask_size())
1130                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1131                 else
1132                         kvm->arch.model.fac->mask[i] = 0UL;
1133         }
1134
1135         /* Populate the facility list initially. */
1136         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1137                S390_ARCH_FAC_LIST_SIZE_BYTE);
1138
1139         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1140         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1141
1142         if (kvm_s390_crypto_init(kvm) < 0)
1143                 goto out_err;
1144
1145         spin_lock_init(&kvm->arch.float_int.lock);
1146         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1147                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1148         init_waitqueue_head(&kvm->arch.ipte_wq);
1149         mutex_init(&kvm->arch.ipte_mutex);
1150
1151         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1152         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1153
1154         if (type & KVM_VM_S390_UCONTROL) {
1155                 kvm->arch.gmap = NULL;
1156         } else {
1157                 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1158                 if (!kvm->arch.gmap)
1159                         goto out_err;
1160                 kvm->arch.gmap->private = kvm;
1161                 kvm->arch.gmap->pfault_enabled = 0;
1162         }
1163
1164         kvm->arch.css_support = 0;
1165         kvm->arch.use_irqchip = 0;
1166         kvm->arch.epoch = 0;
1167
1168         spin_lock_init(&kvm->arch.start_stop_lock);
1169         KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1170
1171         return 0;
1172 out_err:
1173         kfree(kvm->arch.crypto.crycb);
1174         free_page((unsigned long)kvm->arch.model.fac);
1175         debug_unregister(kvm->arch.dbf);
1176         free_page((unsigned long)(kvm->arch.sca));
1177         KVM_EVENT(3, "creation of vm failed: %d", rc);
1178         return rc;
1179 }
1180
1181 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1182 {
1183         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1184         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1185         kvm_s390_clear_local_irqs(vcpu);
1186         kvm_clear_async_pf_completion_queue(vcpu);
1187         if (!kvm_is_ucontrol(vcpu->kvm)) {
1188                 clear_bit(63 - vcpu->vcpu_id,
1189                           (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1190                 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1191                     (__u64) vcpu->arch.sie_block)
1192                         vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1193         }
1194         smp_mb();
1195
1196         if (kvm_is_ucontrol(vcpu->kvm))
1197                 gmap_free(vcpu->arch.gmap);
1198
1199         if (vcpu->kvm->arch.use_cmma)
1200                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1201         free_page((unsigned long)(vcpu->arch.sie_block));
1202
1203         kvm_vcpu_uninit(vcpu);
1204         kmem_cache_free(kvm_vcpu_cache, vcpu);
1205 }
1206
1207 static void kvm_free_vcpus(struct kvm *kvm)
1208 {
1209         unsigned int i;
1210         struct kvm_vcpu *vcpu;
1211
1212         kvm_for_each_vcpu(i, vcpu, kvm)
1213                 kvm_arch_vcpu_destroy(vcpu);
1214
1215         mutex_lock(&kvm->lock);
1216         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1217                 kvm->vcpus[i] = NULL;
1218
1219         atomic_set(&kvm->online_vcpus, 0);
1220         mutex_unlock(&kvm->lock);
1221 }
1222
1223 void kvm_arch_destroy_vm(struct kvm *kvm)
1224 {
1225         kvm_free_vcpus(kvm);
1226         free_page((unsigned long)kvm->arch.model.fac);
1227         free_page((unsigned long)(kvm->arch.sca));
1228         debug_unregister(kvm->arch.dbf);
1229         kfree(kvm->arch.crypto.crycb);
1230         if (!kvm_is_ucontrol(kvm))
1231                 gmap_free(kvm->arch.gmap);
1232         kvm_s390_destroy_adapters(kvm);
1233         kvm_s390_clear_float_irqs(kvm);
1234         KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1235 }
1236
1237 /* Section: vcpu related */
1238 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1239 {
1240         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1241         if (!vcpu->arch.gmap)
1242                 return -ENOMEM;
1243         vcpu->arch.gmap->private = vcpu->kvm;
1244
1245         return 0;
1246 }
1247
1248 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1249 {
1250         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1251         kvm_clear_async_pf_completion_queue(vcpu);
1252         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1253                                     KVM_SYNC_GPRS |
1254                                     KVM_SYNC_ACRS |
1255                                     KVM_SYNC_CRS |
1256                                     KVM_SYNC_ARCH0 |
1257                                     KVM_SYNC_PFAULT;
1258         if (test_kvm_facility(vcpu->kvm, 129))
1259                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1260
1261         if (kvm_is_ucontrol(vcpu->kvm))
1262                 return __kvm_ucontrol_vcpu_init(vcpu);
1263
1264         return 0;
1265 }
1266
1267 /*
1268  * Backs up the current FP/VX register save area on a particular
1269  * destination.  Used to switch between different register save
1270  * areas.
1271  */
1272 static inline void save_fpu_to(struct fpu *dst)
1273 {
1274         dst->fpc = current->thread.fpu.fpc;
1275         dst->flags = current->thread.fpu.flags;
1276         dst->regs = current->thread.fpu.regs;
1277 }
1278
1279 /*
1280  * Switches the FP/VX register save area from which to lazy
1281  * restore register contents.
1282  */
1283 static inline void load_fpu_from(struct fpu *from)
1284 {
1285         current->thread.fpu.fpc = from->fpc;
1286         current->thread.fpu.flags = from->flags;
1287         current->thread.fpu.regs = from->regs;
1288 }
1289
1290 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1291 {
1292         /* Save host register state */
1293         save_fpu_regs();
1294         save_fpu_to(&vcpu->arch.host_fpregs);
1295
1296         if (test_kvm_facility(vcpu->kvm, 129)) {
1297                 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1298                 current->thread.fpu.flags = FPU_USE_VX;
1299                 /*
1300                  * Use the register save area in the SIE-control block
1301                  * for register restore and save in kvm_arch_vcpu_put()
1302                  */
1303                 current->thread.fpu.vxrs =
1304                         (__vector128 *)&vcpu->run->s.regs.vrs;
1305                 /* Always enable the vector extension for KVM */
1306                 __ctl_set_vx();
1307         } else
1308                 load_fpu_from(&vcpu->arch.guest_fpregs);
1309
1310         if (test_fp_ctl(current->thread.fpu.fpc))
1311                 /* User space provided an invalid FPC, let's clear it */
1312                 current->thread.fpu.fpc = 0;
1313
1314         save_access_regs(vcpu->arch.host_acrs);
1315         restore_access_regs(vcpu->run->s.regs.acrs);
1316         gmap_enable(vcpu->arch.gmap);
1317         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1318 }
1319
1320 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1321 {
1322         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1323         gmap_disable(vcpu->arch.gmap);
1324
1325         save_fpu_regs();
1326
1327         if (test_kvm_facility(vcpu->kvm, 129))
1328                 /*
1329                  * kvm_arch_vcpu_load() set up the register save area to
1330                  * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1331                  * are already saved.  Only the floating-point control must be
1332                  * copied.
1333                  */
1334                 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1335         else
1336                 save_fpu_to(&vcpu->arch.guest_fpregs);
1337         load_fpu_from(&vcpu->arch.host_fpregs);
1338
1339         save_access_regs(vcpu->run->s.regs.acrs);
1340         restore_access_regs(vcpu->arch.host_acrs);
1341 }
1342
1343 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1344 {
1345         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1346         vcpu->arch.sie_block->gpsw.mask = 0UL;
1347         vcpu->arch.sie_block->gpsw.addr = 0UL;
1348         kvm_s390_set_prefix(vcpu, 0);
1349         vcpu->arch.sie_block->cputm     = 0UL;
1350         vcpu->arch.sie_block->ckc       = 0UL;
1351         vcpu->arch.sie_block->todpr     = 0;
1352         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1353         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1354         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1355         vcpu->arch.guest_fpregs.fpc = 0;
1356         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1357         vcpu->arch.sie_block->gbea = 1;
1358         vcpu->arch.sie_block->pp = 0;
1359         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1360         kvm_clear_async_pf_completion_queue(vcpu);
1361         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1362                 kvm_s390_vcpu_stop(vcpu);
1363         kvm_s390_clear_local_irqs(vcpu);
1364 }
1365
1366 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1367 {
1368         mutex_lock(&vcpu->kvm->lock);
1369         preempt_disable();
1370         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1371         preempt_enable();
1372         mutex_unlock(&vcpu->kvm->lock);
1373         if (!kvm_is_ucontrol(vcpu->kvm))
1374                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1375 }
1376
1377 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1378 {
1379         if (!test_kvm_facility(vcpu->kvm, 76))
1380                 return;
1381
1382         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1383
1384         if (vcpu->kvm->arch.crypto.aes_kw)
1385                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1386         if (vcpu->kvm->arch.crypto.dea_kw)
1387                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1388
1389         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1390 }
1391
1392 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1393 {
1394         free_page(vcpu->arch.sie_block->cbrlo);
1395         vcpu->arch.sie_block->cbrlo = 0;
1396 }
1397
1398 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1399 {
1400         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1401         if (!vcpu->arch.sie_block->cbrlo)
1402                 return -ENOMEM;
1403
1404         vcpu->arch.sie_block->ecb2 |= 0x80;
1405         vcpu->arch.sie_block->ecb2 &= ~0x08;
1406         return 0;
1407 }
1408
1409 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1410 {
1411         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1412
1413         vcpu->arch.cpu_id = model->cpu_id;
1414         vcpu->arch.sie_block->ibc = model->ibc;
1415         vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1416 }
1417
1418 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1419 {
1420         int rc = 0;
1421
1422         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1423                                                     CPUSTAT_SM |
1424                                                     CPUSTAT_STOPPED);
1425
1426         if (test_kvm_facility(vcpu->kvm, 78))
1427                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1428         else if (test_kvm_facility(vcpu->kvm, 8))
1429                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1430
1431         kvm_s390_vcpu_setup_model(vcpu);
1432
1433         vcpu->arch.sie_block->ecb   = 6;
1434         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1435                 vcpu->arch.sie_block->ecb |= 0x10;
1436
1437         vcpu->arch.sie_block->ecb2  = 8;
1438         vcpu->arch.sie_block->eca   = 0xC1002000U;
1439         if (sclp.has_siif)
1440                 vcpu->arch.sie_block->eca |= 1;
1441         if (sclp.has_sigpif)
1442                 vcpu->arch.sie_block->eca |= 0x10000000U;
1443         if (test_kvm_facility(vcpu->kvm, 129)) {
1444                 vcpu->arch.sie_block->eca |= 0x00020000;
1445                 vcpu->arch.sie_block->ecd |= 0x20000000;
1446         }
1447         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1448
1449         if (vcpu->kvm->arch.use_cmma) {
1450                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1451                 if (rc)
1452                         return rc;
1453         }
1454         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1455         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1456
1457         kvm_s390_vcpu_crypto_setup(vcpu);
1458
1459         return rc;
1460 }
1461
1462 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1463                                       unsigned int id)
1464 {
1465         struct kvm_vcpu *vcpu;
1466         struct sie_page *sie_page;
1467         int rc = -EINVAL;
1468
1469         if (id >= KVM_MAX_VCPUS)
1470                 goto out;
1471
1472         rc = -ENOMEM;
1473
1474         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1475         if (!vcpu)
1476                 goto out;
1477
1478         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1479         if (!sie_page)
1480                 goto out_free_cpu;
1481
1482         vcpu->arch.sie_block = &sie_page->sie_block;
1483         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1484
1485         vcpu->arch.sie_block->icpua = id;
1486         if (!kvm_is_ucontrol(kvm)) {
1487                 if (!kvm->arch.sca) {
1488                         WARN_ON_ONCE(1);
1489                         goto out_free_cpu;
1490                 }
1491                 if (!kvm->arch.sca->cpu[id].sda)
1492                         kvm->arch.sca->cpu[id].sda =
1493                                 (__u64) vcpu->arch.sie_block;
1494                 vcpu->arch.sie_block->scaoh =
1495                         (__u32)(((__u64)kvm->arch.sca) >> 32);
1496                 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1497                 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1498         }
1499
1500         spin_lock_init(&vcpu->arch.local_int.lock);
1501         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1502         vcpu->arch.local_int.wq = &vcpu->wq;
1503         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1504
1505         /*
1506          * Allocate a save area for floating-point registers.  If the vector
1507          * extension is available, register contents are saved in the SIE
1508          * control block.  The allocated save area is still required in
1509          * particular places, for example, in kvm_s390_vcpu_store_status().
1510          */
1511         vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1512                                                GFP_KERNEL);
1513         if (!vcpu->arch.guest_fpregs.fprs) {
1514                 rc = -ENOMEM;
1515                 goto out_free_sie_block;
1516         }
1517
1518         rc = kvm_vcpu_init(vcpu, kvm, id);
1519         if (rc)
1520                 goto out_free_sie_block;
1521         VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1522                  vcpu->arch.sie_block);
1523         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1524
1525         return vcpu;
1526 out_free_sie_block:
1527         free_page((unsigned long)(vcpu->arch.sie_block));
1528 out_free_cpu:
1529         kmem_cache_free(kvm_vcpu_cache, vcpu);
1530 out:
1531         return ERR_PTR(rc);
1532 }
1533
1534 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1535 {
1536         return kvm_s390_vcpu_has_irq(vcpu, 0);
1537 }
1538
1539 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1540 {
1541         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1542         exit_sie(vcpu);
1543 }
1544
1545 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1546 {
1547         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1548 }
1549
1550 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1551 {
1552         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1553         exit_sie(vcpu);
1554 }
1555
1556 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1557 {
1558         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1559 }
1560
1561 /*
1562  * Kick a guest cpu out of SIE and wait until SIE is not running.
1563  * If the CPU is not running (e.g. waiting as idle) the function will
1564  * return immediately. */
1565 void exit_sie(struct kvm_vcpu *vcpu)
1566 {
1567         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1568         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1569                 cpu_relax();
1570 }
1571
1572 /* Kick a guest cpu out of SIE to process a request synchronously */
1573 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1574 {
1575         kvm_make_request(req, vcpu);
1576         kvm_s390_vcpu_request(vcpu);
1577 }
1578
1579 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1580 {
1581         int i;
1582         struct kvm *kvm = gmap->private;
1583         struct kvm_vcpu *vcpu;
1584
1585         kvm_for_each_vcpu(i, vcpu, kvm) {
1586                 /* match against both prefix pages */
1587                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1588                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1589                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1590                 }
1591         }
1592 }
1593
1594 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1595 {
1596         /* kvm common code refers to this, but never calls it */
1597         BUG();
1598         return 0;
1599 }
1600
1601 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1602                                            struct kvm_one_reg *reg)
1603 {
1604         int r = -EINVAL;
1605
1606         switch (reg->id) {
1607         case KVM_REG_S390_TODPR:
1608                 r = put_user(vcpu->arch.sie_block->todpr,
1609                              (u32 __user *)reg->addr);
1610                 break;
1611         case KVM_REG_S390_EPOCHDIFF:
1612                 r = put_user(vcpu->arch.sie_block->epoch,
1613                              (u64 __user *)reg->addr);
1614                 break;
1615         case KVM_REG_S390_CPU_TIMER:
1616                 r = put_user(vcpu->arch.sie_block->cputm,
1617                              (u64 __user *)reg->addr);
1618                 break;
1619         case KVM_REG_S390_CLOCK_COMP:
1620                 r = put_user(vcpu->arch.sie_block->ckc,
1621                              (u64 __user *)reg->addr);
1622                 break;
1623         case KVM_REG_S390_PFTOKEN:
1624                 r = put_user(vcpu->arch.pfault_token,
1625                              (u64 __user *)reg->addr);
1626                 break;
1627         case KVM_REG_S390_PFCOMPARE:
1628                 r = put_user(vcpu->arch.pfault_compare,
1629                              (u64 __user *)reg->addr);
1630                 break;
1631         case KVM_REG_S390_PFSELECT:
1632                 r = put_user(vcpu->arch.pfault_select,
1633                              (u64 __user *)reg->addr);
1634                 break;
1635         case KVM_REG_S390_PP:
1636                 r = put_user(vcpu->arch.sie_block->pp,
1637                              (u64 __user *)reg->addr);
1638                 break;
1639         case KVM_REG_S390_GBEA:
1640                 r = put_user(vcpu->arch.sie_block->gbea,
1641                              (u64 __user *)reg->addr);
1642                 break;
1643         default:
1644                 break;
1645         }
1646
1647         return r;
1648 }
1649
1650 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1651                                            struct kvm_one_reg *reg)
1652 {
1653         int r = -EINVAL;
1654
1655         switch (reg->id) {
1656         case KVM_REG_S390_TODPR:
1657                 r = get_user(vcpu->arch.sie_block->todpr,
1658                              (u32 __user *)reg->addr);
1659                 break;
1660         case KVM_REG_S390_EPOCHDIFF:
1661                 r = get_user(vcpu->arch.sie_block->epoch,
1662                              (u64 __user *)reg->addr);
1663                 break;
1664         case KVM_REG_S390_CPU_TIMER:
1665                 r = get_user(vcpu->arch.sie_block->cputm,
1666                              (u64 __user *)reg->addr);
1667                 break;
1668         case KVM_REG_S390_CLOCK_COMP:
1669                 r = get_user(vcpu->arch.sie_block->ckc,
1670                              (u64 __user *)reg->addr);
1671                 break;
1672         case KVM_REG_S390_PFTOKEN:
1673                 r = get_user(vcpu->arch.pfault_token,
1674                              (u64 __user *)reg->addr);
1675                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1676                         kvm_clear_async_pf_completion_queue(vcpu);
1677                 break;
1678         case KVM_REG_S390_PFCOMPARE:
1679                 r = get_user(vcpu->arch.pfault_compare,
1680                              (u64 __user *)reg->addr);
1681                 break;
1682         case KVM_REG_S390_PFSELECT:
1683                 r = get_user(vcpu->arch.pfault_select,
1684                              (u64 __user *)reg->addr);
1685                 break;
1686         case KVM_REG_S390_PP:
1687                 r = get_user(vcpu->arch.sie_block->pp,
1688                              (u64 __user *)reg->addr);
1689                 break;
1690         case KVM_REG_S390_GBEA:
1691                 r = get_user(vcpu->arch.sie_block->gbea,
1692                              (u64 __user *)reg->addr);
1693                 break;
1694         default:
1695                 break;
1696         }
1697
1698         return r;
1699 }
1700
1701 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1702 {
1703         kvm_s390_vcpu_initial_reset(vcpu);
1704         return 0;
1705 }
1706
1707 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1708 {
1709         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1710         return 0;
1711 }
1712
1713 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1714 {
1715         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1716         return 0;
1717 }
1718
1719 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1720                                   struct kvm_sregs *sregs)
1721 {
1722         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1723         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1724         restore_access_regs(vcpu->run->s.regs.acrs);
1725         return 0;
1726 }
1727
1728 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1729                                   struct kvm_sregs *sregs)
1730 {
1731         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1732         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1733         return 0;
1734 }
1735
1736 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1737 {
1738         if (test_fp_ctl(fpu->fpc))
1739                 return -EINVAL;
1740         memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1741         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1742         save_fpu_regs();
1743         load_fpu_from(&vcpu->arch.guest_fpregs);
1744         return 0;
1745 }
1746
1747 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1748 {
1749         memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1750         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1751         return 0;
1752 }
1753
1754 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1755 {
1756         int rc = 0;
1757
1758         if (!is_vcpu_stopped(vcpu))
1759                 rc = -EBUSY;
1760         else {
1761                 vcpu->run->psw_mask = psw.mask;
1762                 vcpu->run->psw_addr = psw.addr;
1763         }
1764         return rc;
1765 }
1766
1767 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1768                                   struct kvm_translation *tr)
1769 {
1770         return -EINVAL; /* not implemented yet */
1771 }
1772
1773 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1774                               KVM_GUESTDBG_USE_HW_BP | \
1775                               KVM_GUESTDBG_ENABLE)
1776
1777 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1778                                         struct kvm_guest_debug *dbg)
1779 {
1780         int rc = 0;
1781
1782         vcpu->guest_debug = 0;
1783         kvm_s390_clear_bp_data(vcpu);
1784
1785         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1786                 return -EINVAL;
1787
1788         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1789                 vcpu->guest_debug = dbg->control;
1790                 /* enforce guest PER */
1791                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1792
1793                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1794                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1795         } else {
1796                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1797                 vcpu->arch.guestdbg.last_bp = 0;
1798         }
1799
1800         if (rc) {
1801                 vcpu->guest_debug = 0;
1802                 kvm_s390_clear_bp_data(vcpu);
1803                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1804         }
1805
1806         return rc;
1807 }
1808
1809 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1810                                     struct kvm_mp_state *mp_state)
1811 {
1812         /* CHECK_STOP and LOAD are not supported yet */
1813         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1814                                        KVM_MP_STATE_OPERATING;
1815 }
1816
1817 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1818                                     struct kvm_mp_state *mp_state)
1819 {
1820         int rc = 0;
1821
1822         /* user space knows about this interface - let it control the state */
1823         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1824
1825         switch (mp_state->mp_state) {
1826         case KVM_MP_STATE_STOPPED:
1827                 kvm_s390_vcpu_stop(vcpu);
1828                 break;
1829         case KVM_MP_STATE_OPERATING:
1830                 kvm_s390_vcpu_start(vcpu);
1831                 break;
1832         case KVM_MP_STATE_LOAD:
1833         case KVM_MP_STATE_CHECK_STOP:
1834                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1835         default:
1836                 rc = -ENXIO;
1837         }
1838
1839         return rc;
1840 }
1841
1842 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1843 {
1844         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1845 }
1846
1847 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1848 {
1849 retry:
1850         kvm_s390_vcpu_request_handled(vcpu);
1851         if (!vcpu->requests)
1852                 return 0;
1853         /*
1854          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1855          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1856          * This ensures that the ipte instruction for this request has
1857          * already finished. We might race against a second unmapper that
1858          * wants to set the blocking bit. Lets just retry the request loop.
1859          */
1860         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1861                 int rc;
1862                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1863                                       kvm_s390_get_prefix(vcpu),
1864                                       PAGE_SIZE * 2);
1865                 if (rc)
1866                         return rc;
1867                 goto retry;
1868         }
1869
1870         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1871                 vcpu->arch.sie_block->ihcpu = 0xffff;
1872                 goto retry;
1873         }
1874
1875         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1876                 if (!ibs_enabled(vcpu)) {
1877                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1878                         atomic_or(CPUSTAT_IBS,
1879                                         &vcpu->arch.sie_block->cpuflags);
1880                 }
1881                 goto retry;
1882         }
1883
1884         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1885                 if (ibs_enabled(vcpu)) {
1886                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1887                         atomic_andnot(CPUSTAT_IBS,
1888                                           &vcpu->arch.sie_block->cpuflags);
1889                 }
1890                 goto retry;
1891         }
1892
1893         /* nothing to do, just clear the request */
1894         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1895
1896         return 0;
1897 }
1898
1899 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
1900 {
1901         struct kvm_vcpu *vcpu;
1902         int i;
1903
1904         mutex_lock(&kvm->lock);
1905         preempt_disable();
1906         kvm->arch.epoch = tod - get_tod_clock();
1907         kvm_s390_vcpu_block_all(kvm);
1908         kvm_for_each_vcpu(i, vcpu, kvm)
1909                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
1910         kvm_s390_vcpu_unblock_all(kvm);
1911         preempt_enable();
1912         mutex_unlock(&kvm->lock);
1913 }
1914
1915 /**
1916  * kvm_arch_fault_in_page - fault-in guest page if necessary
1917  * @vcpu: The corresponding virtual cpu
1918  * @gpa: Guest physical address
1919  * @writable: Whether the page should be writable or not
1920  *
1921  * Make sure that a guest page has been faulted-in on the host.
1922  *
1923  * Return: Zero on success, negative error code otherwise.
1924  */
1925 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1926 {
1927         return gmap_fault(vcpu->arch.gmap, gpa,
1928                           writable ? FAULT_FLAG_WRITE : 0);
1929 }
1930
1931 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1932                                       unsigned long token)
1933 {
1934         struct kvm_s390_interrupt inti;
1935         struct kvm_s390_irq irq;
1936
1937         if (start_token) {
1938                 irq.u.ext.ext_params2 = token;
1939                 irq.type = KVM_S390_INT_PFAULT_INIT;
1940                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1941         } else {
1942                 inti.type = KVM_S390_INT_PFAULT_DONE;
1943                 inti.parm64 = token;
1944                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1945         }
1946 }
1947
1948 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1949                                      struct kvm_async_pf *work)
1950 {
1951         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1952         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1953 }
1954
1955 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1956                                  struct kvm_async_pf *work)
1957 {
1958         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1959         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1960 }
1961
1962 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1963                                struct kvm_async_pf *work)
1964 {
1965         /* s390 will always inject the page directly */
1966 }
1967
1968 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1969 {
1970         /*
1971          * s390 will always inject the page directly,
1972          * but we still want check_async_completion to cleanup
1973          */
1974         return true;
1975 }
1976
1977 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1978 {
1979         hva_t hva;
1980         struct kvm_arch_async_pf arch;
1981         int rc;
1982
1983         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1984                 return 0;
1985         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1986             vcpu->arch.pfault_compare)
1987                 return 0;
1988         if (psw_extint_disabled(vcpu))
1989                 return 0;
1990         if (kvm_s390_vcpu_has_irq(vcpu, 0))
1991                 return 0;
1992         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1993                 return 0;
1994         if (!vcpu->arch.gmap->pfault_enabled)
1995                 return 0;
1996
1997         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1998         hva += current->thread.gmap_addr & ~PAGE_MASK;
1999         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2000                 return 0;
2001
2002         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2003         return rc;
2004 }
2005
2006 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2007 {
2008         int rc, cpuflags;
2009
2010         /*
2011          * On s390 notifications for arriving pages will be delivered directly
2012          * to the guest but the house keeping for completed pfaults is
2013          * handled outside the worker.
2014          */
2015         kvm_check_async_pf_completion(vcpu);
2016
2017         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2018
2019         if (need_resched())
2020                 schedule();
2021
2022         if (test_cpu_flag(CIF_MCCK_PENDING))
2023                 s390_handle_mcck();
2024
2025         if (!kvm_is_ucontrol(vcpu->kvm)) {
2026                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2027                 if (rc)
2028                         return rc;
2029         }
2030
2031         rc = kvm_s390_handle_requests(vcpu);
2032         if (rc)
2033                 return rc;
2034
2035         if (guestdbg_enabled(vcpu)) {
2036                 kvm_s390_backup_guest_per_regs(vcpu);
2037                 kvm_s390_patch_guest_per_regs(vcpu);
2038         }
2039
2040         vcpu->arch.sie_block->icptcode = 0;
2041         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2042         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2043         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2044
2045         return 0;
2046 }
2047
2048 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2049 {
2050         psw_t *psw = &vcpu->arch.sie_block->gpsw;
2051         u8 opcode;
2052         int rc;
2053
2054         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2055         trace_kvm_s390_sie_fault(vcpu);
2056
2057         /*
2058          * We want to inject an addressing exception, which is defined as a
2059          * suppressing or terminating exception. However, since we came here
2060          * by a DAT access exception, the PSW still points to the faulting
2061          * instruction since DAT exceptions are nullifying. So we've got
2062          * to look up the current opcode to get the length of the instruction
2063          * to be able to forward the PSW.
2064          */
2065         rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2066         if (rc)
2067                 return kvm_s390_inject_prog_cond(vcpu, rc);
2068         psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2069
2070         return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2071 }
2072
2073 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2074 {
2075         int rc = -1;
2076
2077         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2078                    vcpu->arch.sie_block->icptcode);
2079         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2080
2081         if (guestdbg_enabled(vcpu))
2082                 kvm_s390_restore_guest_per_regs(vcpu);
2083
2084         if (exit_reason >= 0) {
2085                 rc = 0;
2086         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2087                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2088                 vcpu->run->s390_ucontrol.trans_exc_code =
2089                                                 current->thread.gmap_addr;
2090                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2091                 rc = -EREMOTE;
2092
2093         } else if (current->thread.gmap_pfault) {
2094                 trace_kvm_s390_major_guest_pfault(vcpu);
2095                 current->thread.gmap_pfault = 0;
2096                 if (kvm_arch_setup_async_pf(vcpu)) {
2097                         rc = 0;
2098                 } else {
2099                         gpa_t gpa = current->thread.gmap_addr;
2100                         rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2101                 }
2102         }
2103
2104         if (rc == -1)
2105                 rc = vcpu_post_run_fault_in_sie(vcpu);
2106
2107         memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2108
2109         if (rc == 0) {
2110                 if (kvm_is_ucontrol(vcpu->kvm))
2111                         /* Don't exit for host interrupts. */
2112                         rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2113                 else
2114                         rc = kvm_handle_sie_intercept(vcpu);
2115         }
2116
2117         return rc;
2118 }
2119
2120 static int __vcpu_run(struct kvm_vcpu *vcpu)
2121 {
2122         int rc, exit_reason;
2123
2124         /*
2125          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2126          * ning the guest), so that memslots (and other stuff) are protected
2127          */
2128         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2129
2130         do {
2131                 rc = vcpu_pre_run(vcpu);
2132                 if (rc)
2133                         break;
2134
2135                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2136                 /*
2137                  * As PF_VCPU will be used in fault handler, between
2138                  * guest_enter and guest_exit should be no uaccess.
2139                  */
2140                 local_irq_disable();
2141                 __kvm_guest_enter();
2142                 local_irq_enable();
2143                 exit_reason = sie64a(vcpu->arch.sie_block,
2144                                      vcpu->run->s.regs.gprs);
2145                 local_irq_disable();
2146                 __kvm_guest_exit();
2147                 local_irq_enable();
2148                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2149
2150                 rc = vcpu_post_run(vcpu, exit_reason);
2151         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2152
2153         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2154         return rc;
2155 }
2156
2157 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2158 {
2159         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2160         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2161         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2162                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2163         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2164                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2165                 /* some control register changes require a tlb flush */
2166                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2167         }
2168         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2169                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2170                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2171                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2172                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2173                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2174         }
2175         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2176                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2177                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2178                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2179                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2180                         kvm_clear_async_pf_completion_queue(vcpu);
2181         }
2182         kvm_run->kvm_dirty_regs = 0;
2183 }
2184
2185 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2186 {
2187         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2188         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2189         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2190         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2191         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2192         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2193         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2194         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2195         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2196         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2197         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2198         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2199 }
2200
2201 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2202 {
2203         int rc;
2204         sigset_t sigsaved;
2205
2206         if (guestdbg_exit_pending(vcpu)) {
2207                 kvm_s390_prepare_debug_exit(vcpu);
2208                 return 0;
2209         }
2210
2211         if (vcpu->sigset_active)
2212                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2213
2214         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2215                 kvm_s390_vcpu_start(vcpu);
2216         } else if (is_vcpu_stopped(vcpu)) {
2217                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2218                                    vcpu->vcpu_id);
2219                 return -EINVAL;
2220         }
2221
2222         sync_regs(vcpu, kvm_run);
2223
2224         might_fault();
2225         rc = __vcpu_run(vcpu);
2226
2227         if (signal_pending(current) && !rc) {
2228                 kvm_run->exit_reason = KVM_EXIT_INTR;
2229                 rc = -EINTR;
2230         }
2231
2232         if (guestdbg_exit_pending(vcpu) && !rc)  {
2233                 kvm_s390_prepare_debug_exit(vcpu);
2234                 rc = 0;
2235         }
2236
2237         if (rc == -EOPNOTSUPP) {
2238                 /* intercept cannot be handled in-kernel, prepare kvm-run */
2239                 kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2240                 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2241                 kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2242                 kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2243                 rc = 0;
2244         }
2245
2246         if (rc == -EREMOTE) {
2247                 /* intercept was handled, but userspace support is needed
2248                  * kvm_run has been prepared by the handler */
2249                 rc = 0;
2250         }
2251
2252         store_regs(vcpu, kvm_run);
2253
2254         if (vcpu->sigset_active)
2255                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2256
2257         vcpu->stat.exit_userspace++;
2258         return rc;
2259 }
2260
2261 /*
2262  * store status at address
2263  * we use have two special cases:
2264  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2265  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2266  */
2267 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2268 {
2269         unsigned char archmode = 1;
2270         unsigned int px;
2271         u64 clkcomp;
2272         int rc;
2273
2274         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2275                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2276                         return -EFAULT;
2277                 gpa = SAVE_AREA_BASE;
2278         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2279                 if (write_guest_real(vcpu, 163, &archmode, 1))
2280                         return -EFAULT;
2281                 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2282         }
2283         rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2284                              vcpu->arch.guest_fpregs.fprs, 128);
2285         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2286                               vcpu->run->s.regs.gprs, 128);
2287         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2288                               &vcpu->arch.sie_block->gpsw, 16);
2289         px = kvm_s390_get_prefix(vcpu);
2290         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2291                               &px, 4);
2292         rc |= write_guest_abs(vcpu,
2293                               gpa + offsetof(struct save_area, fp_ctrl_reg),
2294                               &vcpu->arch.guest_fpregs.fpc, 4);
2295         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2296                               &vcpu->arch.sie_block->todpr, 4);
2297         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2298                               &vcpu->arch.sie_block->cputm, 8);
2299         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2300         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2301                               &clkcomp, 8);
2302         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2303                               &vcpu->run->s.regs.acrs, 64);
2304         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2305                               &vcpu->arch.sie_block->gcr, 128);
2306         return rc ? -EFAULT : 0;
2307 }
2308
2309 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2310 {
2311         /*
2312          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2313          * copying in vcpu load/put. Lets update our copies before we save
2314          * it into the save area
2315          */
2316         save_fpu_regs();
2317         if (test_kvm_facility(vcpu->kvm, 129)) {
2318                 /*
2319                  * If the vector extension is available, the vector registers
2320                  * which overlaps with floating-point registers are saved in
2321                  * the SIE-control block.  Hence, extract the floating-point
2322                  * registers and the FPC value and store them in the
2323                  * guest_fpregs structure.
2324                  */
2325                 WARN_ON(!is_vx_task(current));    /* XXX remove later */
2326                 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2327                 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2328                                  current->thread.fpu.vxrs);
2329         } else
2330                 save_fpu_to(&vcpu->arch.guest_fpregs);
2331         save_access_regs(vcpu->run->s.regs.acrs);
2332
2333         return kvm_s390_store_status_unloaded(vcpu, addr);
2334 }
2335
2336 /*
2337  * store additional status at address
2338  */
2339 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2340                                         unsigned long gpa)
2341 {
2342         /* Only bits 0-53 are used for address formation */
2343         if (!(gpa & ~0x3ff))
2344                 return 0;
2345
2346         return write_guest_abs(vcpu, gpa & ~0x3ff,
2347                                (void *)&vcpu->run->s.regs.vrs, 512);
2348 }
2349
2350 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2351 {
2352         if (!test_kvm_facility(vcpu->kvm, 129))
2353                 return 0;
2354
2355         /*
2356          * The guest VXRS are in the host VXRs due to the lazy
2357          * copying in vcpu load/put. We can simply call save_fpu_regs()
2358          * to save the current register state because we are in the
2359          * middle of a load/put cycle.
2360          *
2361          * Let's update our copies before we save it into the save area.
2362          */
2363         save_fpu_regs();
2364
2365         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2366 }
2367
2368 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2369 {
2370         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2371         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2372 }
2373
2374 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2375 {
2376         unsigned int i;
2377         struct kvm_vcpu *vcpu;
2378
2379         kvm_for_each_vcpu(i, vcpu, kvm) {
2380                 __disable_ibs_on_vcpu(vcpu);
2381         }
2382 }
2383
2384 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2385 {
2386         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2387         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2388 }
2389
2390 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2391 {
2392         int i, online_vcpus, started_vcpus = 0;
2393
2394         if (!is_vcpu_stopped(vcpu))
2395                 return;
2396
2397         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2398         /* Only one cpu at a time may enter/leave the STOPPED state. */
2399         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2400         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2401
2402         for (i = 0; i < online_vcpus; i++) {
2403                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2404                         started_vcpus++;
2405         }
2406
2407         if (started_vcpus == 0) {
2408                 /* we're the only active VCPU -> speed it up */
2409                 __enable_ibs_on_vcpu(vcpu);
2410         } else if (started_vcpus == 1) {
2411                 /*
2412                  * As we are starting a second VCPU, we have to disable
2413                  * the IBS facility on all VCPUs to remove potentially
2414                  * oustanding ENABLE requests.
2415                  */
2416                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2417         }
2418
2419         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2420         /*
2421          * Another VCPU might have used IBS while we were offline.
2422          * Let's play safe and flush the VCPU at startup.
2423          */
2424         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2425         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2426         return;
2427 }
2428
2429 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2430 {
2431         int i, online_vcpus, started_vcpus = 0;
2432         struct kvm_vcpu *started_vcpu = NULL;
2433
2434         if (is_vcpu_stopped(vcpu))
2435                 return;
2436
2437         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2438         /* Only one cpu at a time may enter/leave the STOPPED state. */
2439         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2440         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2441
2442         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2443         kvm_s390_clear_stop_irq(vcpu);
2444
2445         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2446         __disable_ibs_on_vcpu(vcpu);
2447
2448         for (i = 0; i < online_vcpus; i++) {
2449                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2450                         started_vcpus++;
2451                         started_vcpu = vcpu->kvm->vcpus[i];
2452                 }
2453         }
2454
2455         if (started_vcpus == 1) {
2456                 /*
2457                  * As we only have one VCPU left, we want to enable the
2458                  * IBS facility for that VCPU to speed it up.
2459                  */
2460                 __enable_ibs_on_vcpu(started_vcpu);
2461         }
2462
2463         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2464         return;
2465 }
2466
2467 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2468                                      struct kvm_enable_cap *cap)
2469 {
2470         int r;
2471
2472         if (cap->flags)
2473                 return -EINVAL;
2474
2475         switch (cap->cap) {
2476         case KVM_CAP_S390_CSS_SUPPORT:
2477                 if (!vcpu->kvm->arch.css_support) {
2478                         vcpu->kvm->arch.css_support = 1;
2479                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2480                         trace_kvm_s390_enable_css(vcpu->kvm);
2481                 }
2482                 r = 0;
2483                 break;
2484         default:
2485                 r = -EINVAL;
2486                 break;
2487         }
2488         return r;
2489 }
2490
2491 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2492                                   struct kvm_s390_mem_op *mop)
2493 {
2494         void __user *uaddr = (void __user *)mop->buf;
2495         void *tmpbuf = NULL;
2496         int r, srcu_idx;
2497         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2498                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2499
2500         if (mop->flags & ~supported_flags)
2501                 return -EINVAL;
2502
2503         if (mop->size > MEM_OP_MAX_SIZE)
2504                 return -E2BIG;
2505
2506         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2507                 tmpbuf = vmalloc(mop->size);
2508                 if (!tmpbuf)
2509                         return -ENOMEM;
2510         }
2511
2512         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2513
2514         switch (mop->op) {
2515         case KVM_S390_MEMOP_LOGICAL_READ:
2516                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2517                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2518                         break;
2519                 }
2520                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2521                 if (r == 0) {
2522                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2523                                 r = -EFAULT;
2524                 }
2525                 break;
2526         case KVM_S390_MEMOP_LOGICAL_WRITE:
2527                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2528                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2529                         break;
2530                 }
2531                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2532                         r = -EFAULT;
2533                         break;
2534                 }
2535                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2536                 break;
2537         default:
2538                 r = -EINVAL;
2539         }
2540
2541         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2542
2543         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2544                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2545
2546         vfree(tmpbuf);
2547         return r;
2548 }
2549
2550 long kvm_arch_vcpu_ioctl(struct file *filp,
2551                          unsigned int ioctl, unsigned long arg)
2552 {
2553         struct kvm_vcpu *vcpu = filp->private_data;
2554         void __user *argp = (void __user *)arg;
2555         int idx;
2556         long r;
2557
2558         switch (ioctl) {
2559         case KVM_S390_IRQ: {
2560                 struct kvm_s390_irq s390irq;
2561
2562                 r = -EFAULT;
2563                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2564                         break;
2565                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2566                 break;
2567         }
2568         case KVM_S390_INTERRUPT: {
2569                 struct kvm_s390_interrupt s390int;
2570                 struct kvm_s390_irq s390irq;
2571
2572                 r = -EFAULT;
2573                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2574                         break;
2575                 if (s390int_to_s390irq(&s390int, &s390irq))
2576                         return -EINVAL;
2577                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2578                 break;
2579         }
2580         case KVM_S390_STORE_STATUS:
2581                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2582                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2583                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2584                 break;
2585         case KVM_S390_SET_INITIAL_PSW: {
2586                 psw_t psw;
2587
2588                 r = -EFAULT;
2589                 if (copy_from_user(&psw, argp, sizeof(psw)))
2590                         break;
2591                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2592                 break;
2593         }
2594         case KVM_S390_INITIAL_RESET:
2595                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2596                 break;
2597         case KVM_SET_ONE_REG:
2598         case KVM_GET_ONE_REG: {
2599                 struct kvm_one_reg reg;
2600                 r = -EFAULT;
2601                 if (copy_from_user(&reg, argp, sizeof(reg)))
2602                         break;
2603                 if (ioctl == KVM_SET_ONE_REG)
2604                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2605                 else
2606                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2607                 break;
2608         }
2609 #ifdef CONFIG_KVM_S390_UCONTROL
2610         case KVM_S390_UCAS_MAP: {
2611                 struct kvm_s390_ucas_mapping ucasmap;
2612
2613                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2614                         r = -EFAULT;
2615                         break;
2616                 }
2617
2618                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2619                         r = -EINVAL;
2620                         break;
2621                 }
2622
2623                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2624                                      ucasmap.vcpu_addr, ucasmap.length);
2625                 break;
2626         }
2627         case KVM_S390_UCAS_UNMAP: {
2628                 struct kvm_s390_ucas_mapping ucasmap;
2629
2630                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2631                         r = -EFAULT;
2632                         break;
2633                 }
2634
2635                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2636                         r = -EINVAL;
2637                         break;
2638                 }
2639
2640                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2641                         ucasmap.length);
2642                 break;
2643         }
2644 #endif
2645         case KVM_S390_VCPU_FAULT: {
2646                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2647                 break;
2648         }
2649         case KVM_ENABLE_CAP:
2650         {
2651                 struct kvm_enable_cap cap;
2652                 r = -EFAULT;
2653                 if (copy_from_user(&cap, argp, sizeof(cap)))
2654                         break;
2655                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2656                 break;
2657         }
2658         case KVM_S390_MEM_OP: {
2659                 struct kvm_s390_mem_op mem_op;
2660
2661                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2662                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2663                 else
2664                         r = -EFAULT;
2665                 break;
2666         }
2667         case KVM_S390_SET_IRQ_STATE: {
2668                 struct kvm_s390_irq_state irq_state;
2669
2670                 r = -EFAULT;
2671                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2672                         break;
2673                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2674                     irq_state.len == 0 ||
2675                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2676                         r = -EINVAL;
2677                         break;
2678                 }
2679                 r = kvm_s390_set_irq_state(vcpu,
2680                                            (void __user *) irq_state.buf,
2681                                            irq_state.len);
2682                 break;
2683         }
2684         case KVM_S390_GET_IRQ_STATE: {
2685                 struct kvm_s390_irq_state irq_state;
2686
2687                 r = -EFAULT;
2688                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2689                         break;
2690                 if (irq_state.len == 0) {
2691                         r = -EINVAL;
2692                         break;
2693                 }
2694                 r = kvm_s390_get_irq_state(vcpu,
2695                                            (__u8 __user *)  irq_state.buf,
2696                                            irq_state.len);
2697                 break;
2698         }
2699         default:
2700                 r = -ENOTTY;
2701         }
2702         return r;
2703 }
2704
2705 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2706 {
2707 #ifdef CONFIG_KVM_S390_UCONTROL
2708         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2709                  && (kvm_is_ucontrol(vcpu->kvm))) {
2710                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2711                 get_page(vmf->page);
2712                 return 0;
2713         }
2714 #endif
2715         return VM_FAULT_SIGBUS;
2716 }
2717
2718 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2719                             unsigned long npages)
2720 {
2721         return 0;
2722 }
2723
2724 /* Section: memory related */
2725 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2726                                    struct kvm_memory_slot *memslot,
2727                                    const struct kvm_userspace_memory_region *mem,
2728                                    enum kvm_mr_change change)
2729 {
2730         /* A few sanity checks. We can have memory slots which have to be
2731            located/ended at a segment boundary (1MB). The memory in userland is
2732            ok to be fragmented into various different vmas. It is okay to mmap()
2733            and munmap() stuff in this slot after doing this call at any time */
2734
2735         if (mem->userspace_addr & 0xffffful)
2736                 return -EINVAL;
2737
2738         if (mem->memory_size & 0xffffful)
2739                 return -EINVAL;
2740
2741         return 0;
2742 }
2743
2744 void kvm_arch_commit_memory_region(struct kvm *kvm,
2745                                 const struct kvm_userspace_memory_region *mem,
2746                                 const struct kvm_memory_slot *old,
2747                                 const struct kvm_memory_slot *new,
2748                                 enum kvm_mr_change change)
2749 {
2750         int rc;
2751
2752         /* If the basics of the memslot do not change, we do not want
2753          * to update the gmap. Every update causes several unnecessary
2754          * segment translation exceptions. This is usually handled just
2755          * fine by the normal fault handler + gmap, but it will also
2756          * cause faults on the prefix page of running guest CPUs.
2757          */
2758         if (old->userspace_addr == mem->userspace_addr &&
2759             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2760             old->npages * PAGE_SIZE == mem->memory_size)
2761                 return;
2762
2763         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2764                 mem->guest_phys_addr, mem->memory_size);
2765         if (rc)
2766                 pr_warn("failed to commit memory region\n");
2767         return;
2768 }
2769
2770 static int __init kvm_s390_init(void)
2771 {
2772         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2773 }
2774
2775 static void __exit kvm_s390_exit(void)
2776 {
2777         kvm_exit();
2778 }
2779
2780 module_init(kvm_s390_init);
2781 module_exit(kvm_s390_exit);
2782
2783 /*
2784  * Enable autoloading of the kvm module.
2785  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2786  * since x86 takes a different approach.
2787  */
2788 #include <linux/miscdevice.h>
2789 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2790 MODULE_ALIAS("devname:kvm");