Merge tag 'kvm-3.15-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 2 Apr 2014 21:50:10 +0000 (14:50 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 2 Apr 2014 21:50:10 +0000 (14:50 -0700)
Pull kvm updates from Paolo Bonzini:
 "PPC and ARM do not have much going on this time.  Most of the cool
  stuff, instead, is in s390 and (after a few releases) x86.

  ARM has some caching fixes and PPC has transactional memory support in
  guests.  MIPS has some fixes, with more probably coming in 3.16 as
  QEMU will soon get support for MIPS KVM.

  For x86 there are optimizations for debug registers, which trigger on
  some Windows games, and other important fixes for Windows guests.  We
  now expose to the guest Broadwell instruction set extensions and also
  Intel MPX.  There's also a fix/workaround for OS X guests, nested
  virtualization features (preemption timer), and a couple kvmclock
  refinements.

  For s390, the main news is asynchronous page faults, together with
  improvements to IRQs (floating irqs and adapter irqs) that speed up
  virtio devices"

* tag 'kvm-3.15-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (96 commits)
  KVM: PPC: Book3S HV: Save/restore host PMU registers that are new in POWER8
  KVM: PPC: Book3S HV: Fix decrementer timeouts with non-zero TB offset
  KVM: PPC: Book3S HV: Don't use kvm_memslots() in real mode
  KVM: PPC: Book3S HV: Return ENODEV error rather than EIO
  KVM: PPC: Book3S: Trim top 4 bits of physical address in RTAS code
  KVM: PPC: Book3S HV: Add get/set_one_reg for new TM state
  KVM: PPC: Book3S HV: Add transactional memory support
  KVM: Specify byte order for KVM_EXIT_MMIO
  KVM: vmx: fix MPX detection
  KVM: PPC: Book3S HV: Fix KVM hang with CONFIG_KVM_XICS=n
  KVM: PPC: Book3S: Introduce hypervisor call H_GET_TCE
  KVM: PPC: Book3S HV: Fix incorrect userspace exit on ioeventfd write
  KVM: s390: clear local interrupts at cpu initial reset
  KVM: s390: Fix possible memory leak in SIGP functions
  KVM: s390: fix calculation of idle_mask array size
  KVM: s390: randomize sca address
  KVM: ioapic: reinject pending interrupts on KVM_SET_IRQCHIP
  KVM: Bump KVM_MAX_IRQ_ROUTES for s390
  KVM: s390: irq routing for adapter interrupts.
  KVM: s390: adapter interrupt sources
  ...

15 files changed:
1  2 
arch/arm64/include/asm/kvm_arm.h
arch/powerpc/include/asm/reg.h
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/pgtable.h
arch/s390/kernel/irq.c
arch/s390/kvm/diag.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/s390/kvm/priv.c
arch/x86/include/asm/xsave.h
arch/x86/include/uapi/asm/msr-index.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/svm.c
virt/kvm/kvm_main.c

index 21ef48d32ff271fbdccba7f2df710b021f9db7e0,00fbaa75dc7bcf55541c7bf00af609dfb63529a9..3d6903006a8aacf1b6b561db1d426958844665ef
@@@ -62,6 -62,7 +62,7 @@@
   * RW:                64bit by default, can be overriden for 32bit VMs
   * TAC:               Trap ACTLR
   * TSC:               Trap SMC
+  * TVM:               Trap VM ops (until M+C set in SCTLR_EL1)
   * TSW:               Trap cache operations by set/way
   * TWE:               Trap WFE
   * TWI:               Trap WFI
@@@ -74,7 -75,7 +75,7 @@@
   * SWIO:      Turn set/way invalidates into set/way clean+invalidate
   */
  #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
-                        HCR_BSU_IS | HCR_FB | HCR_TAC | \
+                        HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
                         HCR_AMO | HCR_IMO | HCR_FMO | \
                         HCR_SWIO | HCR_TIDCP | HCR_RW)
  #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
  
  /* VTCR_EL2 Registers bits */
  #define VTCR_EL2_PS_MASK      (7 << 16)
 -#define VTCR_EL2_PS_40B               (2 << 16)
  #define VTCR_EL2_TG0_MASK     (1 << 14)
  #define VTCR_EL2_TG0_4K               (0 << 14)
  #define VTCR_EL2_TG0_64K      (1 << 14)
   * 64kB pages (TG0 = 1)
   * 2 level page tables (SL = 1)
   */
 -#define VTCR_EL2_FLAGS                (VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \
 -                               VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
 -                               VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
 -                               VTCR_EL2_T0SZ_40B)
 +#define VTCR_EL2_FLAGS                (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
 +                               VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
 +                               VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
  #define VTTBR_X               (38 - VTCR_EL2_T0SZ_40B)
  #else
  /*
   * 4kB pages (TG0 = 0)
   * 3 level page tables (SL = 1)
   */
 -#define VTCR_EL2_FLAGS                (VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \
 -                               VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
 -                               VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
 -                               VTCR_EL2_T0SZ_40B)
 +#define VTCR_EL2_FLAGS                (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
 +                               VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
 +                               VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
  #define VTTBR_X               (37 - VTCR_EL2_T0SZ_40B)
  #endif
  
index 1a36b8ede41736f91b305eeb3b1f3c1ac225abcd,ce17815b8b55a95dd38439bfea2bf95f8d1c98bc..0dcc48af25a302759a5c0aca3f46b5abd7fb2507
  #define SPRN_ACOP     0x1F    /* Available Coprocessor Register */
  #define SPRN_TFIAR    0x81    /* Transaction Failure Inst Addr   */
  #define SPRN_TEXASR   0x82    /* Transaction EXception & Summary */
+ #define   TEXASR_FS   __MASK(63-36)   /* Transaction Failure Summary */
  #define SPRN_TEXASRU  0x83    /* ''      ''      ''    Upper 32  */
  #define SPRN_TFHAR    0x80    /* Transaction Failure Handler Addr */
  #define SPRN_CTRLF    0x088
  #define SPRN_SPRG3    0x113   /* Special Purpose Register General 3 */
  #define SPRN_USPRG3   0x103   /* SPRG3 userspace read */
  #define SPRN_SPRG4    0x114   /* Special Purpose Register General 4 */
 +#define SPRN_USPRG4   0x104   /* SPRG4 userspace read */
  #define SPRN_SPRG5    0x115   /* Special Purpose Register General 5 */
 +#define SPRN_USPRG5   0x105   /* SPRG5 userspace read */
  #define SPRN_SPRG6    0x116   /* Special Purpose Register General 6 */
 +#define SPRN_USPRG6   0x106   /* SPRG6 userspace read */
  #define SPRN_SPRG7    0x117   /* Special Purpose Register General 7 */
 +#define SPRN_USPRG7   0x107   /* SPRG7 userspace read */
  #define SPRN_SRR0     0x01A   /* Save/Restore Register 0 */
  #define SPRN_SRR1     0x01B   /* Save/Restore Register 1 */
  #define   SRR1_ISI_NOPT               0x40000000 /* ISI: Not found in hash */
  #define   MMCR0_PMXE  0x04000000UL /* performance monitor exception enable */
  #define   MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */
  #define   MMCR0_TBEE  0x00400000UL /* time base exception enable */
 +#define   MMCR0_BHRBA 0x00200000UL /* BHRB Access allowed in userspace */
  #define   MMCR0_EBE   0x00100000UL /* Event based branch enable */
  #define   MMCR0_PMCC  0x000c0000UL /* PMC control */
  #define   MMCR0_PMCC_U6       0x00080000UL /* PMC1-6 are R/W by user (PR) */
  #define   MMCR0_PMC1CE        0x00008000UL /* PMC1 count enable*/
  #define   MMCR0_PMCjCE        0x00004000UL /* PMCj count enable*/
  #define   MMCR0_TRIGGER       0x00002000UL /* TRIGGER enable */
 +#define   MMCR0_PMAO_SYNC 0x00000800UL /* PMU interrupt is synchronous */
  #define   MMCR0_PMAO  0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */
  #define   MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */
  #define   MMCR0_FC56  0x00000010UL /* freeze counters 5 and 6 */
  #define SPRN_EBBHR    804     /* Event based branch handler register */
  #define SPRN_EBBRR    805     /* Event based branch return register */
  #define SPRN_BESCR    806     /* Branch event status and control register */
 +#define   BESCR_GE    0x8000000000000000ULL /* Global Enable */
  #define SPRN_WORT     895     /* Workload optimization register - thread */
  
  #define SPRN_PMC1     787
   * 64-bit embedded
   *    - SPRG0 generic exception scratch
   *    - SPRG2 TLB exception stack
 - *    - SPRG3 critical exception scratch and
 - *        CPU and NUMA node for VDSO getcpu (user visible)
 + *    - SPRG3 critical exception scratch (user visible, sorry!)
   *    - SPRG4 unused (user visible)
   *    - SPRG6 TLB miss scratch (user visible, sorry !)
 - *    - SPRG7 critical exception scratch
 + *    - SPRG7 CPU and NUMA node for VDSO getcpu (user visible)
   *    - SPRG8 machine check exception scratch
   *    - SPRG9 debug exception scratch
   *
  #define SPRN_SPRG_SCRATCH0    SPRN_SPRG2
  #define SPRN_SPRG_HPACA               SPRN_HSPRG0
  #define SPRN_SPRG_HSCRATCH0   SPRN_HSPRG1
 +#define SPRN_SPRG_VDSO_READ   SPRN_USPRG3
 +#define SPRN_SPRG_VDSO_WRITE  SPRN_SPRG3
  
  #define GET_PACA(rX)                                  \
        BEGIN_FTR_SECTION_NESTED(66);                   \
  #define SPRN_SPRG_TLB_SCRATCH SPRN_SPRG6
  #define SPRN_SPRG_GEN_SCRATCH SPRN_SPRG0
  #define SPRN_SPRG_GDBELL_SCRATCH SPRN_SPRG_GEN_SCRATCH
 +#define SPRN_SPRG_VDSO_READ   SPRN_USPRG7
 +#define SPRN_SPRG_VDSO_WRITE  SPRN_SPRG7
  
  #define SET_PACA(rX)  mtspr   SPRN_SPRG_PACA,rX
  #define GET_PACA(rX)  mfspr   rX,SPRN_SPRG_PACA
  #define PVR_8560      0x80200000
  #define PVR_VER_E500V1        0x8020
  #define PVR_VER_E500V2        0x8021
 +#define PVR_VER_E500MC        0x8023
 +#define PVR_VER_E5500 0x8024
  #define PVR_VER_E6500 0x8040
  
  /*
index 53d647f8e7413bd9117d860b74b503fff8b136c7,4963335198fa94524266ba977cf113de274b97dd..ffbb871c2bd803827fa5a78658f29d2fa8a1dbd6
@@@ -28,6 -28,9 +28,9 @@@
  #include <asm/exception-64s.h>
  #include <asm/kvm_book3s_asm.h>
  #include <asm/mmu-hash64.h>
+ #include <asm/tm.h>
+ #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
  
  #ifdef __LITTLE_ENDIAN__
  #error Need to fix lppaca and SLB shadow accesses in little endian mode
@@@ -75,8 -78,8 +78,8 @@@ BEGIN_FTR_SECTIO
  END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
  
        /* Restore SPRG3 */
 -      ld      r3,PACA_SPRG3(r13)
 -      mtspr   SPRN_SPRG3,r3
 +      ld      r3,PACA_SPRG_VDSO(r13)
 +      mtspr   SPRN_SPRG_VDSO_WRITE,r3
  
        /* Reload the host's PMU registers */
        ld      r3, PACALPPACAPTR(r13)  /* is the host using the PMU? */
@@@ -106,8 -109,18 +109,18 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201
        ld      r3, HSTATE_MMCR(r13)
        ld      r4, HSTATE_MMCR + 8(r13)
        ld      r5, HSTATE_MMCR + 16(r13)
+       ld      r6, HSTATE_MMCR + 24(r13)
+       ld      r7, HSTATE_MMCR + 32(r13)
        mtspr   SPRN_MMCR1, r4
        mtspr   SPRN_MMCRA, r5
+       mtspr   SPRN_SIAR, r6
+       mtspr   SPRN_SDAR, r7
+ BEGIN_FTR_SECTION
+       ld      r8, HSTATE_MMCR + 40(r13)
+       ld      r9, HSTATE_MMCR + 48(r13)
+       mtspr   SPRN_MMCR2, r8
+       mtspr   SPRN_SIER, r9
+ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        mtspr   SPRN_MMCR0, r3
        isync
  23:
@@@ -597,6 -610,116 +610,116 @@@ BEGIN_FTR_SECTIO
   END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
  END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
  
+ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ BEGIN_FTR_SECTION
+       b       skip_tm
+ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+       /* Turn on TM/FP/VSX/VMX so we can restore them. */
+       mfmsr   r5
+       li      r6, MSR_TM >> 32
+       sldi    r6, r6, 32
+       or      r5, r5, r6
+       ori     r5, r5, MSR_FP
+       oris    r5, r5, (MSR_VEC | MSR_VSX)@h
+       mtmsrd  r5
+       /*
+        * The user may change these outside of a transaction, so they must
+        * always be context switched.
+        */
+       ld      r5, VCPU_TFHAR(r4)
+       ld      r6, VCPU_TFIAR(r4)
+       ld      r7, VCPU_TEXASR(r4)
+       mtspr   SPRN_TFHAR, r5
+       mtspr   SPRN_TFIAR, r6
+       mtspr   SPRN_TEXASR, r7
+       ld      r5, VCPU_MSR(r4)
+       rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+       beq     skip_tm /* TM not active in guest */
+       /* Make sure the failure summary is set, otherwise we'll program check
+        * when we trechkpt.  It's possible that this might have been not set
+        * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
+        * host.
+        */
+       oris    r7, r7, (TEXASR_FS)@h
+       mtspr   SPRN_TEXASR, r7
+       /*
+        * We need to load up the checkpointed state for the guest.
+        * We need to do this early as it will blow away any GPRs, VSRs and
+        * some SPRs.
+        */
+       mr      r31, r4
+       addi    r3, r31, VCPU_FPRS_TM
+       bl      .load_fp_state
+       addi    r3, r31, VCPU_VRS_TM
+       bl      .load_vr_state
+       mr      r4, r31
+       lwz     r7, VCPU_VRSAVE_TM(r4)
+       mtspr   SPRN_VRSAVE, r7
+       ld      r5, VCPU_LR_TM(r4)
+       lwz     r6, VCPU_CR_TM(r4)
+       ld      r7, VCPU_CTR_TM(r4)
+       ld      r8, VCPU_AMR_TM(r4)
+       ld      r9, VCPU_TAR_TM(r4)
+       mtlr    r5
+       mtcr    r6
+       mtctr   r7
+       mtspr   SPRN_AMR, r8
+       mtspr   SPRN_TAR, r9
+       /*
+        * Load up PPR and DSCR values but don't put them in the actual SPRs
+        * till the last moment to avoid running with userspace PPR and DSCR for
+        * too long.
+        */
+       ld      r29, VCPU_DSCR_TM(r4)
+       ld      r30, VCPU_PPR_TM(r4)
+       std     r2, PACATMSCRATCH(r13) /* Save TOC */
+       /* Clear the MSR RI since r1, r13 are all going to be foobar. */
+       li      r5, 0
+       mtmsrd  r5, 1
+       /* Load GPRs r0-r28 */
+       reg = 0
+       .rept   29
+       ld      reg, VCPU_GPRS_TM(reg)(r31)
+       reg = reg + 1
+       .endr
+       mtspr   SPRN_DSCR, r29
+       mtspr   SPRN_PPR, r30
+       /* Load final GPRs */
+       ld      29, VCPU_GPRS_TM(29)(r31)
+       ld      30, VCPU_GPRS_TM(30)(r31)
+       ld      31, VCPU_GPRS_TM(31)(r31)
+       /* TM checkpointed state is now setup.  All GPRs are now volatile. */
+       TRECHKPT
+       /* Now let's get back the state we need. */
+       HMT_MEDIUM
+       GET_PACA(r13)
+       ld      r29, HSTATE_DSCR(r13)
+       mtspr   SPRN_DSCR, r29
+       ld      r4, HSTATE_KVM_VCPU(r13)
+       ld      r1, HSTATE_HOST_R1(r13)
+       ld      r2, PACATMSCRATCH(r13)
+       /* Set the MSR RI since we have our registers back. */
+       li      r5, MSR_RI
+       mtmsrd  r5, 1
+ skip_tm:
+ #endif
        /* Load guest PMU registers */
        /* R4 is live here (vcpu pointer) */
        li      r3, 1
@@@ -704,14 -827,6 +827,6 @@@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S
        ld      r6, VCPU_VTB(r4)
        mtspr   SPRN_IC, r5
        mtspr   SPRN_VTB, r6
- #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-       ld      r5, VCPU_TFHAR(r4)
-       ld      r6, VCPU_TFIAR(r4)
-       ld      r7, VCPU_TEXASR(r4)
-       mtspr   SPRN_TFHAR, r5
-       mtspr   SPRN_TFIAR, r6
-       mtspr   SPRN_TEXASR, r7
- #endif
        ld      r8, VCPU_EBBHR(r4)
        mtspr   SPRN_EBBHR, r8
        ld      r5, VCPU_EBBRR(r4)
         * Set the decrementer to the guest decrementer.
         */
        ld      r8,VCPU_DEC_EXPIRES(r4)
+       /* r8 is a host timebase value here, convert to guest TB */
+       ld      r5,HSTATE_KVM_VCORE(r13)
+       ld      r6,VCORE_TB_OFFSET(r5)
+       add     r8,r8,r6
        mftb    r7
        subf    r3,r7,r8
        mtspr   SPRN_DEC,r3
@@@ -817,7 -936,8 +936,8 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206
  12:   mtspr   SPRN_SRR0, r10
        mr      r10,r0
        mtspr   SPRN_SRR1, r11
-       ld      r11, VCPU_INTR_MSR(r4)
+       mr      r9, r4
+       bl      kvmppc_msr_interrupt
  5:
  
  /*
@@@ -1098,17 -1218,15 +1218,15 @@@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201
        mftb    r6
        extsw   r5,r5
        add     r5,r5,r6
+       /* r5 is a guest timebase value here, convert to host TB */
+       ld      r3,HSTATE_KVM_VCORE(r13)
+       ld      r4,VCORE_TB_OFFSET(r3)
+       subf    r5,r4,r5
        std     r5,VCPU_DEC_EXPIRES(r9)
  
  BEGIN_FTR_SECTION
        b       8f
  END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
-       /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
-       mfmsr   r8
-       li      r0, 1
-       rldimi  r8, r0, MSR_TM_LG, 63-MSR_TM_LG
-       mtmsrd  r8
        /* Save POWER8-specific registers */
        mfspr   r5, SPRN_IAMR
        mfspr   r6, SPRN_PSPB
        std     r5, VCPU_IC(r9)
        std     r6, VCPU_VTB(r9)
        std     r7, VCPU_TAR(r9)
- #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-       mfspr   r5, SPRN_TFHAR
-       mfspr   r6, SPRN_TFIAR
-       mfspr   r7, SPRN_TEXASR
-       std     r5, VCPU_TFHAR(r9)
-       std     r6, VCPU_TFIAR(r9)
-       std     r7, VCPU_TEXASR(r9)
- #endif
        mfspr   r8, SPRN_EBBHR
        std     r8, VCPU_EBBHR(r9)
        mfspr   r5, SPRN_EBBRR
@@@ -1387,7 -1497,7 +1497,7 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S
        ld      r8,VCORE_TB_OFFSET(r5)
        cmpdi   r8,0
        beq     17f
-       mftb    r6                      /* current host timebase */
+       mftb    r6                      /* current guest timebase */
        subf    r8,r8,r6
        mtspr   SPRN_TBU40,r8           /* update upper 40 bits */
        mftb    r7                      /* check if lower 24 bits overflowed */
@@@ -1557,7 -1667,7 +1667,7 @@@ kvmppc_hdsi
        mtspr   SPRN_SRR0, r10
        mtspr   SPRN_SRR1, r11
        li      r10, BOOK3S_INTERRUPT_DATA_STORAGE
-       ld      r11, VCPU_INTR_MSR(r9)
+       bl      kvmppc_msr_interrupt
  fast_interrupt_c_return:
  6:    ld      r7, VCPU_CTR(r9)
        lwz     r8, VCPU_XER(r9)
@@@ -1626,7 -1736,7 +1736,7 @@@ kvmppc_hisi
  1:    mtspr   SPRN_SRR0, r10
        mtspr   SPRN_SRR1, r11
        li      r10, BOOK3S_INTERRUPT_INST_STORAGE
-       ld      r11, VCPU_INTR_MSR(r9)
+       bl      kvmppc_msr_interrupt
        b       fast_interrupt_c_return
  
  3:    ld      r6, VCPU_KVM(r9)        /* not relocated, use VRMA */
@@@ -1669,7 -1779,7 +1779,7 @@@ sc_1_fast_return
        mtspr   SPRN_SRR0,r10
        mtspr   SPRN_SRR1,r11
        li      r10, BOOK3S_INTERRUPT_SYSCALL
-       ld      r11, VCPU_INTR_MSR(r9)
+       bl      kvmppc_msr_interrupt
        mr      r4,r9
        b       fast_guest_return
  
@@@ -1691,7 -1801,7 +1801,7 @@@ hcall_real_table
        .long   0               /* 0x10 - H_CLEAR_MOD */
        .long   0               /* 0x14 - H_CLEAR_REF */
        .long   .kvmppc_h_protect - hcall_real_table
-       .long   0               /* 0x1c - H_GET_TCE */
+       .long   .kvmppc_h_get_tce - hcall_real_table
        .long   .kvmppc_h_put_tce - hcall_real_table
        .long   0               /* 0x24 - H_SET_SPRG0 */
        .long   .kvmppc_h_set_dabr - hcall_real_table
@@@ -1997,7 -2107,7 +2107,7 @@@ machine_check_realmode
        beq     mc_cont
        /* If not, deliver a machine check.  SRR0/1 are already set */
        li      r10, BOOK3S_INTERRUPT_MACHINE_CHECK
-       ld      r11, VCPU_INTR_MSR(r9)
+       bl      kvmppc_msr_interrupt
        b       fast_interrupt_c_return
  
  /*
@@@ -2138,8 -2248,6 +2248,6 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC
        mfspr   r6,SPRN_VRSAVE
        stw     r6,VCPU_VRSAVE(r31)
        mtlr    r30
-       mtmsrd  r5
-       isync
        blr
  
  /*
@@@ -2186,3 -2294,20 +2294,20 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC
   */
  kvmppc_bad_host_intr:
        b       .
+ /*
+  * This mimics the MSR transition on IRQ delivery.  The new guest MSR is taken
+  * from VCPU_INTR_MSR and is modified based on the required TM state changes.
+  *   r11 has the guest MSR value (in/out)
+  *   r9 has a vcpu pointer (in)
+  *   r0 is used as a scratch register
+  */
+ kvmppc_msr_interrupt:
+       rldicl  r0, r11, 64 - MSR_TS_S_LG, 62
+       cmpwi   r0, 2 /* Check if we are in transactional state..  */
+       ld      r11, VCPU_INTR_MSR(r9)
+       bne     1f
+       /* ... if transactional, change to suspended */
+       li      r0, 1
+ 1:    rldimi  r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+       blr
index 9bf95bb30f1a6cf27d0494396168f87e23cf8a65,68897fc6595081af3acd0cb9007fef661c676563..154b60089be996de483f07844f9229c728918892
  #include <linux/hrtimer.h>
  #include <linux/interrupt.h>
  #include <linux/kvm_host.h>
+ #include <linux/kvm.h>
  #include <asm/debug.h>
  #include <asm/cpu.h>
+ #include <asm/isc.h>
  
  #define KVM_MAX_VCPUS 64
  #define KVM_USER_MEM_SLOTS 32
  
+ /*
+  * These seem to be used for allocating ->chip in the routing table,
+  * which we don't use. 4096 is an out-of-thin-air value. If we need
+  * to look at ->chip later on, we'll need to revisit this.
+  */
+ #define KVM_NR_IRQCHIPS 1
+ #define KVM_IRQCHIP_NUM_PINS 4096
  struct sca_entry {
        atomic_t scn;
        __u32   reserved;
@@@ -106,9 -116,9 +116,11 @@@ struct kvm_s390_sie_block 
        __u64   gbea;                   /* 0x0180 */
        __u8    reserved188[24];        /* 0x0188 */
        __u32   fac;                    /* 0x01a0 */
 -      __u8    reserved1a4[58];        /* 0x01a4 */
 +      __u8    reserved1a4[20];        /* 0x01a4 */
 +      __u64   cbrlo;                  /* 0x01b8 */
-       __u8    reserved1c0[40];        /* 0x01c0 */
++      __u8    reserved1c0[30];        /* 0x01c0 */
+       __u64   pp;                     /* 0x01de */
+       __u8    reserved1e6[2];         /* 0x01e6 */
        __u64   itdba;                  /* 0x01e8 */
        __u8    reserved1f0[16];        /* 0x01f0 */
  } __attribute__((packed));
@@@ -157,7 -167,6 +169,7 @@@ struct kvm_vcpu_stat 
        u32 instruction_stsi;
        u32 instruction_stfl;
        u32 instruction_tprot;
 +      u32 instruction_essa;
        u32 instruction_sigp_sense;
        u32 instruction_sigp_sense_running;
        u32 instruction_sigp_external_call;
        u32 diagnose_9c;
  };
  
- struct kvm_s390_io_info {
-       __u16        subchannel_id;            /* 0x0b8 */
-       __u16        subchannel_nr;            /* 0x0ba */
-       __u32        io_int_parm;              /* 0x0bc */
-       __u32        io_int_word;              /* 0x0c0 */
- };
- struct kvm_s390_ext_info {
-       __u32 ext_params;
-       __u64 ext_params2;
- };
  #define PGM_OPERATION            0x01
  #define PGM_PRIVILEGED_OP      0x02
  #define PGM_EXECUTE              0x03
  #define PGM_SPECIFICATION        0x06
  #define PGM_DATA                 0x07
  
- struct kvm_s390_pgm_info {
-       __u16 code;
- };
- struct kvm_s390_prefix_info {
-       __u32 address;
- };
- struct kvm_s390_extcall_info {
-       __u16 code;
- };
- struct kvm_s390_emerg_info {
-       __u16 code;
- };
- struct kvm_s390_mchk_info {
-       __u64 cr14;
-       __u64 mcic;
- };
  struct kvm_s390_interrupt_info {
        struct list_head list;
        u64     type;
@@@ -246,9 -222,8 +225,8 @@@ struct kvm_s390_float_interrupt 
        struct list_head list;
        atomic_t active;
        int next_rr_cpu;
-       unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1)
-                               / sizeof(long)];
-       struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS];
+       unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+       unsigned int irq_count;
  };
  
  
@@@ -265,6 -240,10 +243,10 @@@ struct kvm_vcpu_arch 
                u64             stidp_data;
        };
        struct gmap *gmap;
+ #define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
+       unsigned long pfault_token;
+       unsigned long pfault_select;
+       unsigned long pfault_compare;
  };
  
  struct kvm_vm_stat {
  struct kvm_arch_memory_slot {
  };
  
+ struct s390_map_info {
+       struct list_head list;
+       __u64 guest_addr;
+       __u64 addr;
+       struct page *page;
+ };
+ struct s390_io_adapter {
+       unsigned int id;
+       int isc;
+       bool maskable;
+       bool masked;
+       bool swap;
+       struct rw_semaphore maps_lock;
+       struct list_head maps;
+       atomic_t nr_maps;
+ };
+ #define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
+ #define MAX_S390_ADAPTER_MAPS 256
  struct kvm_arch{
        struct sca_block *sca;
        debug_info_t *dbf;
        struct kvm_s390_float_interrupt float_int;
+       struct kvm_device *flic;
        struct gmap *gmap;
        int css_support;
+       int use_irqchip;
+       struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
  };
  
  #define KVM_HVA_ERR_BAD               (-1UL)
@@@ -290,6 -293,24 +296,24 @@@ static inline bool kvm_is_error_hva(uns
        return IS_ERR_VALUE(addr);
  }
  
+ #define ASYNC_PF_PER_VCPU     64
+ struct kvm_vcpu;
+ struct kvm_async_pf;
+ struct kvm_arch_async_pf {
+       unsigned long pfault_token;
+ };
+ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+                              struct kvm_async_pf *work);
+ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+                                    struct kvm_async_pf *work);
+ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+                                struct kvm_async_pf *work);
  extern int sie64a(struct kvm_s390_sie_block *, u64 *);
  extern char sie_exit;
  #endif
index 1ab75eaacbd417079d3e6de25e8401c87e24a1d7,66101f6c6d819354d2e04ef33a275cec253c2edd..50a75d96f9394faeb60a4ca5d8ca0f1411d754c0
@@@ -229,7 -229,6 +229,7 @@@ extern unsigned long MODULES_END
  #define _PAGE_READ    0x010           /* SW pte read bit */
  #define _PAGE_WRITE   0x020           /* SW pte write bit */
  #define _PAGE_SPECIAL 0x040           /* SW associated with special page */
 +#define _PAGE_UNUSED  0x080           /* SW bit for pgste usage state */
  #define __HAVE_ARCH_PTE_SPECIAL
  
  /* Set of bits not changed in pte_modify */
  
  #endif /* CONFIG_64BIT */
  
 +/* Guest Page State used for virtualization */
 +#define _PGSTE_GPS_ZERO               0x0000000080000000UL
 +#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL
 +#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
 +#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
 +
  /*
   * A user page table pointer has the space-switch-event bit, the
   * private-space-control bit and the storage-alteration-event-control
@@@ -624,14 -617,6 +624,14 @@@ static inline int pte_none(pte_t pte
        return pte_val(pte) == _PAGE_INVALID;
  }
  
 +static inline int pte_swap(pte_t pte)
 +{
 +      /* Bit pattern: (pte & 0x603) == 0x402 */
 +      return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT |
 +                              _PAGE_TYPE | _PAGE_PRESENT))
 +              == (_PAGE_INVALID | _PAGE_TYPE);
 +}
 +
  static inline int pte_file(pte_t pte)
  {
        /* Bit pattern: (pte & 0x601) == 0x600 */
@@@ -782,6 -767,7 +782,7 @@@ static inline void pgste_set_pte(pte_t 
   * @table: pointer to the page directory
   * @asce: address space control element for gmap page table
   * @crst_list: list of all crst tables used in the guest address space
+  * @pfault_enabled: defines if pfaults are applicable for the guest
   */
  struct gmap {
        struct list_head list;
        unsigned long asce;
        void *private;
        struct list_head crst_list;
+       bool pfault_enabled;
  };
  
  /**
@@@ -836,20 -823,20 +838,20 @@@ unsigned long gmap_translate(unsigned l
  unsigned long __gmap_fault(unsigned long address, struct gmap *);
  unsigned long gmap_fault(unsigned long address, struct gmap *);
  void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
 +void __gmap_zap(unsigned long address, struct gmap *);
  
  void gmap_register_ipte_notifier(struct gmap_notifier *);
  void gmap_unregister_ipte_notifier(struct gmap_notifier *);
  int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
 -void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
 +void gmap_do_ipte_notify(struct mm_struct *, pte_t *);
  
  static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
 -                                      unsigned long addr,
                                        pte_t *ptep, pgste_t pgste)
  {
  #ifdef CONFIG_PGSTE
        if (pgste_val(pgste) & PGSTE_IN_BIT) {
                pgste_val(pgste) &= ~PGSTE_IN_BIT;
 -              gmap_do_ipte_notify(mm, addr, ptep);
 +              gmap_do_ipte_notify(mm, ptep);
        }
  #endif
        return pgste;
@@@ -867,7 -854,6 +869,7 @@@ static inline void set_pte_at(struct mm
  
        if (mm_has_pgste(mm)) {
                pgste = pgste_get_lock(ptep);
 +              pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
                pgste_set_key(ptep, pgste, entry);
                pgste_set_pte(ptep, entry);
                pgste_set_unlock(ptep, pgste);
@@@ -897,12 -883,6 +899,12 @@@ static inline int pte_young(pte_t pte
        return (pte_val(pte) & _PAGE_YOUNG) != 0;
  }
  
 +#define __HAVE_ARCH_PTE_UNUSED
 +static inline int pte_unused(pte_t pte)
 +{
 +      return pte_val(pte) & _PAGE_UNUSED;
 +}
 +
  /*
   * pgd/pmd/pte modification functions
   */
@@@ -1056,41 -1036,30 +1058,41 @@@ static inline int ptep_test_and_clear_u
  
  static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
  {
 -      if (!(pte_val(*ptep) & _PAGE_INVALID)) {
 +      unsigned long pto = (unsigned long) ptep;
 +
  #ifndef CONFIG_64BIT
 -              /* pto must point to the start of the segment table */
 -              pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
 -#else
 -              /* ipte in zarch mode can do the math */
 -              pte_t *pto = ptep;
 +      /* pto in ESA mode must point to the start of the segment table */
 +      pto &= 0x7ffffc00;
  #endif
 -              asm volatile(
 -                      "       ipte    %2,%3"
 -                      : "=m" (*ptep) : "m" (*ptep),
 -                        "a" (pto), "a" (address));
 -      }
 +      /* Invalidation + global TLB flush for the pte */
 +      asm volatile(
 +              "       ipte    %2,%3"
 +              : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
 +}
 +
 +static inline void ptep_flush_direct(struct mm_struct *mm,
 +                                   unsigned long address, pte_t *ptep)
 +{
 +      if (pte_val(*ptep) & _PAGE_INVALID)
 +              return;
 +      __ptep_ipte(address, ptep);
  }
  
  static inline void ptep_flush_lazy(struct mm_struct *mm,
                                   unsigned long address, pte_t *ptep)
  {
 -      int active = (mm == current->active_mm) ? 1 : 0;
 +      int active, count;
  
 -      if (atomic_read(&mm->context.attach_count) > active)
 -              __ptep_ipte(address, ptep);
 -      else
 +      if (pte_val(*ptep) & _PAGE_INVALID)
 +              return;
 +      active = (mm == current->active_mm) ? 1 : 0;
 +      count = atomic_add_return(0x10000, &mm->context.attach_count);
 +      if ((count & 0xffff) <= active) {
 +              pte_val(*ptep) |= _PAGE_INVALID;
                mm->context.flush_mm = 1;
 +      } else
 +              __ptep_ipte(address, ptep);
 +      atomic_sub(0x10000, &mm->context.attach_count);
  }
  
  #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
@@@ -1103,11 -1072,11 +1105,11 @@@ static inline int ptep_test_and_clear_y
  
        if (mm_has_pgste(vma->vm_mm)) {
                pgste = pgste_get_lock(ptep);
 -              pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
 +              pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
        }
  
        pte = *ptep;
 -      __ptep_ipte(addr, ptep);
 +      ptep_flush_direct(vma->vm_mm, addr, ptep);
        young = pte_young(pte);
        pte = pte_mkold(pte);
  
@@@ -1149,7 -1118,7 +1151,7 @@@ static inline pte_t ptep_get_and_clear(
  
        if (mm_has_pgste(mm)) {
                pgste = pgste_get_lock(ptep);
 -              pgste = pgste_ipte_notify(mm, address, ptep, pgste);
 +              pgste = pgste_ipte_notify(mm, ptep, pgste);
        }
  
        pte = *ptep;
@@@ -1173,11 -1142,12 +1175,11 @@@ static inline pte_t ptep_modify_prot_st
  
        if (mm_has_pgste(mm)) {
                pgste = pgste_get_lock(ptep);
 -              pgste_ipte_notify(mm, address, ptep, pgste);
 +              pgste_ipte_notify(mm, ptep, pgste);
        }
  
        pte = *ptep;
        ptep_flush_lazy(mm, address, ptep);
 -      pte_val(*ptep) |= _PAGE_INVALID;
  
        if (mm_has_pgste(mm)) {
                pgste = pgste_update_all(&pte, pgste);
@@@ -1210,17 -1180,14 +1212,17 @@@ static inline pte_t ptep_clear_flush(st
  
        if (mm_has_pgste(vma->vm_mm)) {
                pgste = pgste_get_lock(ptep);
 -              pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
 +              pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
        }
  
        pte = *ptep;
 -      __ptep_ipte(address, ptep);
 +      ptep_flush_direct(vma->vm_mm, address, ptep);
        pte_val(*ptep) = _PAGE_INVALID;
  
        if (mm_has_pgste(vma->vm_mm)) {
 +              if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
 +                  _PGSTE_GPS_USAGE_UNUSED)
 +                      pte_val(pte) |= _PAGE_UNUSED;
                pgste = pgste_update_all(&pte, pgste);
                pgste_set_unlock(ptep, pgste);
        }
@@@ -1244,7 -1211,7 +1246,7 @@@ static inline pte_t ptep_get_and_clear_
  
        if (!full && mm_has_pgste(mm)) {
                pgste = pgste_get_lock(ptep);
 -              pgste = pgste_ipte_notify(mm, address, ptep, pgste);
 +              pgste = pgste_ipte_notify(mm, ptep, pgste);
        }
  
        pte = *ptep;
@@@ -1269,7 -1236,7 +1271,7 @@@ static inline pte_t ptep_set_wrprotect(
        if (pte_write(pte)) {
                if (mm_has_pgste(mm)) {
                        pgste = pgste_get_lock(ptep);
 -                      pgste = pgste_ipte_notify(mm, address, ptep, pgste);
 +                      pgste = pgste_ipte_notify(mm, ptep, pgste);
                }
  
                ptep_flush_lazy(mm, address, ptep);
@@@ -1295,10 -1262,10 +1297,10 @@@ static inline int ptep_set_access_flags
                return 0;
        if (mm_has_pgste(vma->vm_mm)) {
                pgste = pgste_get_lock(ptep);
 -              pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
 +              pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
        }
  
 -      __ptep_ipte(address, ptep);
 +      ptep_flush_direct(vma->vm_mm, address, ptep);
  
        if (mm_has_pgste(vma->vm_mm)) {
                pgste_set_pte(ptep, entry);
@@@ -1482,16 -1449,12 +1484,16 @@@ static inline pmd_t pmd_mkwrite(pmd_t p
  static inline void pmdp_flush_lazy(struct mm_struct *mm,
                                   unsigned long address, pmd_t *pmdp)
  {
 -      int active = (mm == current->active_mm) ? 1 : 0;
 +      int active, count;
  
 -      if ((atomic_read(&mm->context.attach_count) & 0xffff) > active)
 -              __pmd_idte(address, pmdp);
 -      else
 +      active = (mm == current->active_mm) ? 1 : 0;
 +      count = atomic_add_return(0x10000, &mm->context.attach_count);
 +      if ((count & 0xffff) <= active) {
 +              pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
                mm->context.flush_mm = 1;
 +      } else
 +              __pmd_idte(address, pmdp);
 +      atomic_sub(0x10000, &mm->context.attach_count);
  }
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --combined arch/s390/kernel/irq.c
index a770be97db4da7c513e7952d0360c85a5d5154bb,c288ef7e47b427211e129d7d98dfeebba518a57b..d42b14cc72a4516efa4c976a8e84ad47b97cbaa2
@@@ -18,7 -18,6 +18,7 @@@
  #include <linux/errno.h>
  #include <linux/slab.h>
  #include <linux/cpu.h>
 +#include <linux/irq.h>
  #include <asm/irq_regs.h>
  #include <asm/cputime.h>
  #include <asm/lowcore.h>
@@@ -85,6 -84,7 +85,7 @@@ static const struct irq_class irqclass_
        [IRQIO_PCI]  = {.name = "PCI", .desc = "[I/O] PCI Interrupt" },
        [IRQIO_MSI]  = {.name = "MSI", .desc = "[I/O] MSI Interrupt" },
        [IRQIO_VIR]  = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
+       [IRQIO_VAI]  = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
        [NMI_NMI]    = {.name = "NMI", .desc = "[NMI] Machine Check"},
        [CPU_RST]    = {.name = "RST", .desc = "[CPU] CPU Restart"},
  };
diff --combined arch/s390/kvm/diag.c
index 6f9cfa50037246d8d37bb5dd81a21d643ff3f544,bf9ed34c2bcd84af9a196f9a372c60de1c42a7d8..03a05ffb662f98d426302cffb4b08cb5cade7fac
  
  #include <linux/kvm.h>
  #include <linux/kvm_host.h>
 +#include <asm/pgalloc.h>
  #include <asm/virtio-ccw.h>
  #include "kvm-s390.h"
  #include "trace.h"
  #include "trace-s390.h"
+ #include "gaccess.h"
  
  static int diag_release_pages(struct kvm_vcpu *vcpu)
  {
        return 0;
  }
  
+ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
+ {
+       struct prs_parm {
+               u16 code;
+               u16 subcode;
+               u16 parm_len;
+               u16 parm_version;
+               u64 token_addr;
+               u64 select_mask;
+               u64 compare_mask;
+               u64 zarch;
+       };
+       struct prs_parm parm;
+       int rc;
+       u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
+       u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
+       unsigned long hva_token = KVM_HVA_ERR_BAD;
+       if (vcpu->run->s.regs.gprs[rx] & 7)
+               return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+       if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
+               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+       if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
+               return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+       switch (parm.subcode) {
+       case 0: /* TOKEN */
+               if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
+                       /*
+                        * If the pagefault handshake is already activated,
+                        * the token must not be changed.  We have to return
+                        * decimal 8 instead, as mandated in SC24-6084.
+                        */
+                       vcpu->run->s.regs.gprs[ry] = 8;
+                       return 0;
+               }
+               if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
+                   parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
+                       return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+               hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
+               if (kvm_is_error_hva(hva_token))
+                       return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+               vcpu->arch.pfault_token = parm.token_addr;
+               vcpu->arch.pfault_select = parm.select_mask;
+               vcpu->arch.pfault_compare = parm.compare_mask;
+               vcpu->run->s.regs.gprs[ry] = 0;
+               rc = 0;
+               break;
+       case 1: /*
+                * CANCEL
+                * Specification allows to let already pending tokens survive
+                * the cancel, therefore to reduce code complexity, we assume
+                * all outstanding tokens are already pending.
+                */
+               if (parm.token_addr || parm.select_mask ||
+                   parm.compare_mask || parm.zarch)
+                       return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+               vcpu->run->s.regs.gprs[ry] = 0;
+               /*
+                * If the pfault handling was not established or is already
+                * canceled SC24-6084 requests to return decimal 4.
+                */
+               if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+                       vcpu->run->s.regs.gprs[ry] = 4;
+               else
+                       vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+               rc = 0;
+               break;
+       default:
+               rc = -EOPNOTSUPP;
+               break;
+       }
+       return rc;
+ }
  static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
  {
        VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
@@@ -87,11 -168,9 +169,11 @@@ static int __diag_ipl_functions(struct 
        switch (subcode) {
        case 3:
                vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
 +              page_table_reset_pgste(current->mm, 0, TASK_SIZE);
                break;
        case 4:
                vcpu->run->s390_reset_flags = 0;
 +              page_table_reset_pgste(current->mm, 0, TASK_SIZE);
                break;
        default:
                return -EOPNOTSUPP;
@@@ -153,6 -232,8 +235,8 @@@ int kvm_s390_handle_diag(struct kvm_vcp
                return __diag_time_slice_end(vcpu);
        case 0x9c:
                return __diag_time_slice_end_directed(vcpu);
+       case 0x258:
+               return __diag_page_ref_service(vcpu);
        case 0x308:
                return __diag_ipl_functions(vcpu);
        case 0x500:
diff --combined arch/s390/kvm/kvm-s390.c
index 10b5db3c9bc4a71d179ed02b994d7fdea6109311,6e1b990e427fcc79e2589dbf25566671c51af815..b3ecb8f5b6ce2bcefb4fe92a64b99d2012cdd770
@@@ -68,7 -68,6 +68,7 @@@ struct kvm_stats_debugfs_item debugfs_e
        { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
        { "instruction_stsch", VCPU_STAT(instruction_stsch) },
        { "instruction_chsc", VCPU_STAT(instruction_chsc) },
 +      { "instruction_essa", VCPU_STAT(instruction_essa) },
        { "instruction_stsi", VCPU_STAT(instruction_stsi) },
        { "instruction_stfl", VCPU_STAT(instruction_stfl) },
        { "instruction_tprot", VCPU_STAT(instruction_tprot) },
@@@ -153,11 -152,14 +153,14 @@@ int kvm_dev_ioctl_check_extension(long 
  #ifdef CONFIG_KVM_S390_UCONTROL
        case KVM_CAP_S390_UCONTROL:
  #endif
+       case KVM_CAP_ASYNC_PF:
        case KVM_CAP_SYNC_REGS:
        case KVM_CAP_ONE_REG:
        case KVM_CAP_ENABLE_CAP:
        case KVM_CAP_S390_CSS_SUPPORT:
        case KVM_CAP_IOEVENTFD:
+       case KVM_CAP_DEVICE_CTRL:
+       case KVM_CAP_ENABLE_CAP_VM:
                r = 1;
                break;
        case KVM_CAP_NR_VCPUS:
@@@ -186,6 -188,25 +189,25 @@@ int kvm_vm_ioctl_get_dirty_log(struct k
        return 0;
  }
  
+ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+ {
+       int r;
+       if (cap->flags)
+               return -EINVAL;
+       switch (cap->cap) {
+       case KVM_CAP_S390_IRQCHIP:
+               kvm->arch.use_irqchip = 1;
+               r = 0;
+               break;
+       default:
+               r = -EINVAL;
+               break;
+       }
+       return r;
+ }
  long kvm_arch_vm_ioctl(struct file *filp,
                       unsigned int ioctl, unsigned long arg)
  {
                r = kvm_s390_inject_vm(kvm, &s390int);
                break;
        }
+       case KVM_ENABLE_CAP: {
+               struct kvm_enable_cap cap;
+               r = -EFAULT;
+               if (copy_from_user(&cap, argp, sizeof(cap)))
+                       break;
+               r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+               break;
+       }
+       case KVM_CREATE_IRQCHIP: {
+               struct kvm_irq_routing_entry routing;
+               r = -EINVAL;
+               if (kvm->arch.use_irqchip) {
+                       /* Set up dummy routing. */
+                       memset(&routing, 0, sizeof(routing));
+                       kvm_set_irq_routing(kvm, &routing, 0, 0);
+                       r = 0;
+               }
+               break;
+       }
        default:
                r = -ENOTTY;
        }
@@@ -214,6 -255,7 +256,7 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
  {
        int rc;
        char debug_name[16];
+       static unsigned long sca_offset;
  
        rc = -EINVAL;
  #ifdef CONFIG_KVM_S390_UCONTROL
        kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
        if (!kvm->arch.sca)
                goto out_err;
+       spin_lock(&kvm_lock);
+       sca_offset = (sca_offset + 16) & 0x7f0;
+       kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
+       spin_unlock(&kvm_lock);
  
        sprintf(debug_name, "kvm-%u", current->pid);
  
                if (!kvm->arch.gmap)
                        goto out_nogmap;
                kvm->arch.gmap->private = kvm;
+               kvm->arch.gmap->pfault_enabled = 0;
        }
  
        kvm->arch.css_support = 0;
+       kvm->arch.use_irqchip = 0;
  
        return 0;
  out_nogmap:
@@@ -272,6 -320,7 +321,7 @@@ void kvm_arch_vcpu_destroy(struct kvm_v
  {
        VCPU_EVENT(vcpu, 3, "%s", "free cpu");
        trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
+       kvm_clear_async_pf_completion_queue(vcpu);
        if (!kvm_is_ucontrol(vcpu->kvm)) {
                clear_bit(63 - vcpu->vcpu_id,
                          (unsigned long *) &vcpu->kvm->arch.sca->mcn);
        if (kvm_is_ucontrol(vcpu->kvm))
                gmap_free(vcpu->arch.gmap);
  
 +      if (vcpu->arch.sie_block->cbrlo)
 +              __free_page(__pfn_to_page(
 +                              vcpu->arch.sie_block->cbrlo >> PAGE_SHIFT));
        free_page((unsigned long)(vcpu->arch.sie_block));
 +
        kvm_vcpu_uninit(vcpu);
        kmem_cache_free(kvm_vcpu_cache, vcpu);
  }
@@@ -320,11 -365,14 +370,14 @@@ void kvm_arch_destroy_vm(struct kvm *kv
        debug_unregister(kvm->arch.dbf);
        if (!kvm_is_ucontrol(kvm))
                gmap_free(kvm->arch.gmap);
+       kvm_s390_destroy_adapters(kvm);
  }
  
  /* Section: vcpu related */
  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
  {
+       vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+       kvm_clear_async_pf_completion_queue(vcpu);
        if (kvm_is_ucontrol(vcpu->kvm)) {
                vcpu->arch.gmap = gmap_alloc(current->mm);
                if (!vcpu->arch.gmap)
@@@ -385,7 -433,11 +438,11 @@@ static void kvm_s390_vcpu_initial_reset
        vcpu->arch.guest_fpregs.fpc = 0;
        asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
        vcpu->arch.sie_block->gbea = 1;
+       vcpu->arch.sie_block->pp = 0;
+       vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+       kvm_clear_async_pf_completion_queue(vcpu);
        atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+       kvm_s390_clear_local_irqs(vcpu);
  }
  
  int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
  
  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
  {
 +      struct page *cbrl;
 +
        atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
                                                    CPUSTAT_SM |
                                                    CPUSTAT_STOPPED |
        vcpu->arch.sie_block->ecb2  = 8;
        vcpu->arch.sie_block->eca   = 0xC1002001U;
        vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
 +      if (kvm_enabled_cmma()) {
 +              cbrl = alloc_page(GFP_KERNEL | __GFP_ZERO);
 +              if (cbrl) {
 +                      vcpu->arch.sie_block->ecb2 |= 0x80;
 +                      vcpu->arch.sie_block->ecb2 &= ~0x08;
 +                      vcpu->arch.sie_block->cbrlo = page_to_phys(cbrl);
 +              }
 +      }
        hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
        tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
                     (unsigned long) vcpu);
@@@ -466,11 -508,8 +523,8 @@@ struct kvm_vcpu *kvm_arch_vcpu_create(s
        spin_lock_init(&vcpu->arch.local_int.lock);
        INIT_LIST_HEAD(&vcpu->arch.local_int.list);
        vcpu->arch.local_int.float_int = &kvm->arch.float_int;
-       spin_lock(&kvm->arch.float_int.lock);
-       kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
        vcpu->arch.local_int.wq = &vcpu->wq;
        vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
-       spin_unlock(&kvm->arch.float_int.lock);
  
        rc = kvm_vcpu_init(vcpu, kvm, id);
        if (rc)
@@@ -490,9 -529,7 +544,7 @@@ out
  
  int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
  {
-       /* kvm common code refers to this, but never calls it */
-       BUG();
-       return 0;
+       return kvm_cpu_has_interrupt(vcpu);
  }
  
  void s390_vcpu_block(struct kvm_vcpu *vcpu)
@@@ -568,6 -605,26 +620,26 @@@ static int kvm_arch_vcpu_ioctl_get_one_
                r = put_user(vcpu->arch.sie_block->ckc,
                             (u64 __user *)reg->addr);
                break;
+       case KVM_REG_S390_PFTOKEN:
+               r = put_user(vcpu->arch.pfault_token,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_PFCOMPARE:
+               r = put_user(vcpu->arch.pfault_compare,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_PFSELECT:
+               r = put_user(vcpu->arch.pfault_select,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_PP:
+               r = put_user(vcpu->arch.sie_block->pp,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_GBEA:
+               r = put_user(vcpu->arch.sie_block->gbea,
+                            (u64 __user *)reg->addr);
+               break;
        default:
                break;
        }
@@@ -597,6 -654,26 +669,26 @@@ static int kvm_arch_vcpu_ioctl_set_one_
                r = get_user(vcpu->arch.sie_block->ckc,
                             (u64 __user *)reg->addr);
                break;
+       case KVM_REG_S390_PFTOKEN:
+               r = get_user(vcpu->arch.pfault_token,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_PFCOMPARE:
+               r = get_user(vcpu->arch.pfault_compare,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_PFSELECT:
+               r = get_user(vcpu->arch.pfault_select,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_PP:
+               r = get_user(vcpu->arch.sie_block->pp,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_GBEA:
+               r = get_user(vcpu->arch.sie_block->gbea,
+                            (u64 __user *)reg->addr);
+               break;
        default:
                break;
        }
@@@ -715,10 -792,100 +807,100 @@@ static int kvm_s390_handle_requests(str
        return 0;
  }
  
+ static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
+ {
+       long rc;
+       hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
+       struct mm_struct *mm = current->mm;
+       down_read(&mm->mmap_sem);
+       rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
+       up_read(&mm->mmap_sem);
+       return rc;
+ }
+ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
+                                     unsigned long token)
+ {
+       struct kvm_s390_interrupt inti;
+       inti.parm64 = token;
+       if (start_token) {
+               inti.type = KVM_S390_INT_PFAULT_INIT;
+               WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
+       } else {
+               inti.type = KVM_S390_INT_PFAULT_DONE;
+               WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
+       }
+ }
+ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+                                    struct kvm_async_pf *work)
+ {
+       trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
+       __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+ }
+ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+                                struct kvm_async_pf *work)
+ {
+       trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
+       __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
+ }
+ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+                              struct kvm_async_pf *work)
+ {
+       /* s390 will always inject the page directly */
+ }
+ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+ {
+       /*
+        * s390 will always inject the page directly,
+        * but we still want check_async_completion to cleanup
+        */
+       return true;
+ }
+ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
+ {
+       hva_t hva;
+       struct kvm_arch_async_pf arch;
+       int rc;
+       if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+               return 0;
+       if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
+           vcpu->arch.pfault_compare)
+               return 0;
+       if (psw_extint_disabled(vcpu))
+               return 0;
+       if (kvm_cpu_has_interrupt(vcpu))
+               return 0;
+       if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+               return 0;
+       if (!vcpu->arch.gmap->pfault_enabled)
+               return 0;
+       hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
+       if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
+               return 0;
+       rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+       return rc;
+ }
  static int vcpu_pre_run(struct kvm_vcpu *vcpu)
  {
        int rc, cpuflags;
  
+       /*
+        * On s390 notifications for arriving pages will be delivered directly
+        * to the guest but the house keeping for completed pfaults is
+        * handled outside the worker.
+        */
+       kvm_check_async_pf_completion(vcpu);
        memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
  
        if (need_resched())
  
  static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
  {
-       int rc;
+       int rc = -1;
  
        VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
                   vcpu->arch.sie_block->icptcode);
                                                current->thread.gmap_addr;
                vcpu->run->s390_ucontrol.pgm_code = 0x10;
                rc = -EREMOTE;
-       } else {
+       } else if (current->thread.gmap_pfault) {
+               trace_kvm_s390_major_guest_pfault(vcpu);
+               current->thread.gmap_pfault = 0;
+               if (kvm_arch_setup_async_pf(vcpu) ||
+                   (kvm_arch_fault_in_sync(vcpu) >= 0))
+                       rc = 0;
+       }
+       if (rc == -1) {
                VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
                trace_kvm_s390_sie_fault(vcpu);
                rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
  
        if (rc == 0) {
                if (kvm_is_ucontrol(vcpu->kvm))
-                       rc = -EOPNOTSUPP;
+                       /* Don't exit for host interrupts. */
+                       rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
                else
                        rc = kvm_handle_sie_intercept(vcpu);
        }
        return rc;
  }
  
 +bool kvm_enabled_cmma(void)
 +{
 +      if (!MACHINE_IS_LPAR)
 +              return false;
 +      /* only enable for z10 and later */
 +      if (!MACHINE_HAS_EDAT1)
 +              return false;
 +      return true;
 +}
 +
  static int __vcpu_run(struct kvm_vcpu *vcpu)
  {
        int rc, exit_reason;
@@@ -831,8 -998,6 +1023,6 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
  
        atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
  
-       BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
        switch (kvm_run->exit_reason) {
        case KVM_EXIT_S390_SIEIC:
        case KVM_EXIT_UNKNOWN:
diff --combined arch/s390/kvm/kvm-s390.h
index 564514f410f45682272bdc5a3e5064306a9e9960,660e79f8f8e8db52a3296df9dbbe5b950339519a..3c1e2274d9eae858fce363cd5f89ddb699e1fa05
@@@ -129,6 -129,7 +129,7 @@@ enum hrtimer_restart kvm_s390_idle_wake
  void kvm_s390_tasklet(unsigned long parm);
  void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
  void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
+ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
  int __must_check kvm_s390_inject_vm(struct kvm *kvm,
                                    struct kvm_s390_interrupt *s390int);
  int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
  int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
  struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
                                                    u64 cr6, u64 schid);
+ int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
  
  /* implemented in priv.c */
  int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
@@@ -156,9 -158,12 +158,14 @@@ void s390_vcpu_block(struct kvm_vcpu *v
  void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
  void exit_sie(struct kvm_vcpu *vcpu);
  void exit_sie_sync(struct kvm_vcpu *vcpu);
 +/* are we going to support cmma? */
 +bool kvm_enabled_cmma(void);
  /* implemented in diag.c */
  int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
  
+ /* implemented in interrupt.c */
+ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
+ int psw_extint_disabled(struct kvm_vcpu *vcpu);
+ void kvm_s390_destroy_adapters(struct kvm *kvm);
  #endif
diff --combined arch/s390/kvm/priv.c
index aacb6b129914bc1c7d207d0587fc4fb2efe66ccf,ae9e8ee2155705051e42043cce43e18386acafcb..476e9e218f43ee5cfa2842951c845a0499c4834e
@@@ -396,15 -396,10 +396,10 @@@ static int handle_stidp(struct kvm_vcp
  
  static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
  {
-       struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
        int cpus = 0;
        int n;
  
-       spin_lock(&fi->lock);
-       for (n = 0; n < KVM_MAX_VCPUS; n++)
-               if (fi->local_int[n])
-                       cpus++;
-       spin_unlock(&fi->lock);
+       cpus = atomic_read(&vcpu->kvm->online_vcpus);
  
        /* deal with other level 3 hypervisors */
        if (stsi(mem, 3, 2, 2))
@@@ -636,49 -631,8 +631,49 @@@ static int handle_pfmf(struct kvm_vcpu 
        return 0;
  }
  
 +static int handle_essa(struct kvm_vcpu *vcpu)
 +{
 +      /* entries expected to be 1FF */
 +      int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
 +      unsigned long *cbrlo, cbrle;
 +      struct gmap *gmap;
 +      int i;
 +
 +      VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries);
 +      gmap = vcpu->arch.gmap;
 +      vcpu->stat.instruction_essa++;
 +      if (!kvm_enabled_cmma() || !vcpu->arch.sie_block->cbrlo)
 +              return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
 +
 +      if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 +              return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 +
 +      if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
 +              return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 +
 +      /* Rewind PSW to repeat the ESSA instruction */
 +      vcpu->arch.sie_block->gpsw.addr =
 +              __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
 +      vcpu->arch.sie_block->cbrlo &= PAGE_MASK;       /* reset nceo */
 +      cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
 +      down_read(&gmap->mm->mmap_sem);
 +      for (i = 0; i < entries; ++i) {
 +              cbrle = cbrlo[i];
 +              if (unlikely(cbrle & ~PAGE_MASK || cbrle < 2 * PAGE_SIZE))
 +                      /* invalid entry */
 +                      break;
 +              /* try to free backing */
 +              __gmap_zap(cbrle, gmap);
 +      }
 +      up_read(&gmap->mm->mmap_sem);
 +      if (i < entries)
 +              return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 +      return 0;
 +}
 +
  static const intercept_handler_t b9_handlers[256] = {
        [0x8d] = handle_epsw,
 +      [0xab] = handle_essa,
        [0xaf] = handle_pfmf,
  };
  
index 6c1d7411eb009a5a96ef55004fdedf60aa1ef822,dcd047b629ec044655ff5963f00aa60134e655de..d949ef28c48bd9c423c5c668a1b0ce61ed70cac5
@@@ -6,16 -6,15 +6,18 @@@
  
  #define XSTATE_CPUID          0x0000000d
  
 -#define XSTATE_FP     0x1
 -#define XSTATE_SSE    0x2
 -#define XSTATE_YMM    0x4
 -#define XSTATE_BNDREGS        0x8
 -#define XSTATE_BNDCSR 0x10
 +#define XSTATE_FP             0x1
 +#define XSTATE_SSE            0x2
 +#define XSTATE_YMM            0x4
 +#define XSTATE_BNDREGS                0x8
 +#define XSTATE_BNDCSR         0x10
 +#define XSTATE_OPMASK         0x20
 +#define XSTATE_ZMM_Hi256      0x40
 +#define XSTATE_Hi16_ZMM               0x80
  
  #define XSTATE_FPSSE  (XSTATE_FP | XSTATE_SSE)
+ /* Bit 63 of XCR0 is reserved for future expansion */
+ #define XSTATE_EXTEND_MASK    (~(XSTATE_FPSSE | (1ULL << 63)))
  
  #define FXSAVE_SIZE   512
  
@@@ -26,8 -25,7 +28,8 @@@
  #define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
  
  /* Supported features which support lazy state saving */
 -#define XSTATE_LAZY   (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
 +#define XSTATE_LAZY   (XSTATE_FP | XSTATE_SSE | XSTATE_YMM                  \
 +                      | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
  
  /* Supported features which require eager state saving */
  #define XSTATE_EAGER  (XSTATE_BNDREGS | XSTATE_BNDCSR)
index 4924f4be2b992198995a3bf328f011de65dc2230,ed821ed45eb613c14abae840ea8b51114f8b87e0..c827ace3121bc0f7ff3dc9d4cc74024207adc68c
  #define MSR_SMI_COUNT                 0x00000034
  #define MSR_IA32_FEATURE_CONTROL        0x0000003a
  #define MSR_IA32_TSC_ADJUST             0x0000003b
+ #define MSR_IA32_BNDCFGS              0x00000d90
  
  #define FEATURE_CONTROL_LOCKED                                (1<<0)
  #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX      (1<<1)
  #define THERM_LOG_THRESHOLD1           (1 << 9)
  
  /* MISC_ENABLE bits: architectural */
 -#define MSR_IA32_MISC_ENABLE_FAST_STRING      (1ULL << 0)
 -#define MSR_IA32_MISC_ENABLE_TCC              (1ULL << 1)
 -#define MSR_IA32_MISC_ENABLE_EMON             (1ULL << 7)
 -#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL      (1ULL << 11)
 -#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL     (1ULL << 12)
 -#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP       (1ULL << 16)
 -#define MSR_IA32_MISC_ENABLE_MWAIT            (1ULL << 18)
 -#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID      (1ULL << 22)
 -#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE     (1ULL << 23)
 -#define MSR_IA32_MISC_ENABLE_XD_DISABLE               (1ULL << 34)
 +#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT          0
 +#define MSR_IA32_MISC_ENABLE_FAST_STRING              (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
 +#define MSR_IA32_MISC_ENABLE_TCC_BIT                  1
 +#define MSR_IA32_MISC_ENABLE_TCC                      (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT)
 +#define MSR_IA32_MISC_ENABLE_EMON_BIT                 7
 +#define MSR_IA32_MISC_ENABLE_EMON                     (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT)
 +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT          11
 +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL              (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT)
 +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT         12
 +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL             (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT)
 +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT   16
 +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP               (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT)
 +#define MSR_IA32_MISC_ENABLE_MWAIT_BIT                        18
 +#define MSR_IA32_MISC_ENABLE_MWAIT                    (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
 +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT          22
 +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID              (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT);
 +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT         23
 +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE             (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
 +#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT           34
 +#define MSR_IA32_MISC_ENABLE_XD_DISABLE                       (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT)
  
  /* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
 -#define MSR_IA32_MISC_ENABLE_X87_COMPAT               (1ULL << 2)
 -#define MSR_IA32_MISC_ENABLE_TM1              (1ULL << 3)
 -#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE       (1ULL << 4)
 -#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE  (1ULL << 6)
 -#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK    (1ULL << 8)
 -#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << 9)
 -#define MSR_IA32_MISC_ENABLE_FERR             (1ULL << 10)
 -#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX   (1ULL << 10)
 -#define MSR_IA32_MISC_ENABLE_TM2              (1ULL << 13)
 -#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << 19)
 -#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK   (1ULL << 20)
 -#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT      (1ULL << 24)
 -#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << 37)
 -#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE    (1ULL << 38)
 -#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE  (1ULL << 39)
 +#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT           2
 +#define MSR_IA32_MISC_ENABLE_X87_COMPAT                       (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT)
 +#define MSR_IA32_MISC_ENABLE_TM1_BIT                  3
 +#define MSR_IA32_MISC_ENABLE_TM1                      (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT)
 +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT   4
 +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE               (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT)
 +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT      6
 +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE          (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT)
 +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT                8
 +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK            (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT)
 +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT     9
 +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE         (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
 +#define MSR_IA32_MISC_ENABLE_FERR_BIT                 10
 +#define MSR_IA32_MISC_ENABLE_FERR                     (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT)
 +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT               10
 +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX           (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT)
 +#define MSR_IA32_MISC_ENABLE_TM2_BIT                  13
 +#define MSR_IA32_MISC_ENABLE_TM2                      (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT)
 +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT     19
 +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE         (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT)
 +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT               20
 +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK           (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT)
 +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT          24
 +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT              (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT)
 +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT     37
 +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE         (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT)
 +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT                38
 +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE            (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT)
 +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT      39
 +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE          (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
  
  #define MSR_IA32_TSC_DEADLINE         0x000006E0
  
diff --combined arch/x86/kvm/cpuid.c
index e5503d8aec1dac41f6ff7f97eb77e43beee6f38b,64fae65730f3a536f8586c9d0de26395c0c283db..bea60671ef8a8c17227e4c4124c0f52a7c8c6f63
@@@ -28,7 -28,7 +28,7 @@@ static u32 xstate_required_size(u64 xst
        int feature_bit = 0;
        u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
  
-       xstate_bv &= ~XSTATE_FPSSE;
+       xstate_bv &= XSTATE_EXTEND_MASK;
        while (xstate_bv) {
                if (xstate_bv & 0x1) {
                        u32 eax, ebx, ecx, edx;
        return ret;
  }
  
+ u64 kvm_supported_xcr0(void)
+ {
+       u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
+       if (!kvm_x86_ops->mpx_supported())
+               xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
+       return xcr0;
+ }
  void kvm_update_cpuid(struct kvm_vcpu *vcpu)
  {
        struct kvm_cpuid_entry2 *best;
@@@ -73,9 -83,9 +83,9 @@@
        } else {
                vcpu->arch.guest_supported_xcr0 =
                        (best->eax | ((u64)best->edx << 32)) &
-                       host_xcr0 & KVM_SUPPORTED_XCR0;
-               vcpu->arch.guest_xstate_size =
-                       xstate_required_size(vcpu->arch.guest_supported_xcr0);
+                       kvm_supported_xcr0();
+               vcpu->arch.guest_xstate_size = best->ebx =
+                       xstate_required_size(vcpu->arch.xcr0);
        }
  
        kvm_pmu_cpuid_update(vcpu);
@@@ -210,13 -220,6 +220,6 @@@ static void do_cpuid_1_ent(struct kvm_c
        entry->flags = 0;
  }
  
- static bool supported_xcr0_bit(unsigned bit)
- {
-       u64 mask = ((u64)1 << bit);
-       return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
- }
  #define F(x) bit(X86_FEATURE_##x)
  
  static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
@@@ -256,6 -259,7 +259,7 @@@ static inline int __do_cpuid_ent(struc
  #endif
        unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
        unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
+       unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
  
        /* cpuid 1.edx */
        const u32 kvm_supported_word0_x86_features =
                F(TSC) | F(MSR) | F(PAE) | F(MCE) |
                F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
                F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
 -              F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
 +              F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
                0 /* Reserved, DS, ACPI */ | F(MMX) |
                F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
                0 /* HTT, TM, Reserved, PBE */;
        /* cpuid 7.0.ebx */
        const u32 kvm_supported_word9_x86_features =
                F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
-               F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
+               F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
+               F(ADX);
  
        /* all calls to cpuid_count() should be made on the same cpu */
        get_cpu();
        }
        case 0xd: {
                int idx, i;
+               u64 supported = kvm_supported_xcr0();
  
-               entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
-               entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
+               entry->eax &= supported;
+               entry->edx &= supported >> 32;
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                for (idx = 1, i = 1; idx < 64; ++idx) {
+                       u64 mask = ((u64)1 << idx);
                        if (*nent >= maxnent)
                                goto out;
  
                        do_cpuid_1_ent(&entry[i], function, idx);
-                       if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
+                       if (entry[i].eax == 0 || !(supported & mask))
                                continue;
                        entry[i].flags |=
                               KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
diff --combined arch/x86/kvm/svm.c
index 2de1bc09a8d40a0508e7e364bc1de301215cc7c5,2136cb6ab1327a2ef645eb6451324b2ba8280a91..7f4f9c2badaefdf880b999fed48274748a808fd7
@@@ -34,6 -34,7 +34,7 @@@
  #include <asm/perf_event.h>
  #include <asm/tlbflush.h>
  #include <asm/desc.h>
+ #include <asm/debugreg.h>
  #include <asm/kvm_para.h>
  
  #include <asm/virtext.h>
@@@ -303,20 -304,35 +304,35 @@@ static inline bool is_cr_intercept(stru
        return vmcb->control.intercept_cr & (1U << bit);
  }
  
- static inline void set_dr_intercept(struct vcpu_svm *svm, int bit)
+ static inline void set_dr_intercepts(struct vcpu_svm *svm)
  {
        struct vmcb *vmcb = get_host_vmcb(svm);
  
-       vmcb->control.intercept_dr |= (1U << bit);
+       vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
+               | (1 << INTERCEPT_DR1_READ)
+               | (1 << INTERCEPT_DR2_READ)
+               | (1 << INTERCEPT_DR3_READ)
+               | (1 << INTERCEPT_DR4_READ)
+               | (1 << INTERCEPT_DR5_READ)
+               | (1 << INTERCEPT_DR6_READ)
+               | (1 << INTERCEPT_DR7_READ)
+               | (1 << INTERCEPT_DR0_WRITE)
+               | (1 << INTERCEPT_DR1_WRITE)
+               | (1 << INTERCEPT_DR2_WRITE)
+               | (1 << INTERCEPT_DR3_WRITE)
+               | (1 << INTERCEPT_DR4_WRITE)
+               | (1 << INTERCEPT_DR5_WRITE)
+               | (1 << INTERCEPT_DR6_WRITE)
+               | (1 << INTERCEPT_DR7_WRITE);
  
        recalc_intercepts(svm);
  }
  
- static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit)
+ static inline void clr_dr_intercepts(struct vcpu_svm *svm)
  {
        struct vmcb *vmcb = get_host_vmcb(svm);
  
-       vmcb->control.intercept_dr &= ~(1U << bit);
+       vmcb->control.intercept_dr = 0;
  
        recalc_intercepts(svm);
  }
@@@ -1080,23 -1096,7 +1096,7 @@@ static void init_vmcb(struct vcpu_svm *
        set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
        set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
  
-       set_dr_intercept(svm, INTERCEPT_DR0_READ);
-       set_dr_intercept(svm, INTERCEPT_DR1_READ);
-       set_dr_intercept(svm, INTERCEPT_DR2_READ);
-       set_dr_intercept(svm, INTERCEPT_DR3_READ);
-       set_dr_intercept(svm, INTERCEPT_DR4_READ);
-       set_dr_intercept(svm, INTERCEPT_DR5_READ);
-       set_dr_intercept(svm, INTERCEPT_DR6_READ);
-       set_dr_intercept(svm, INTERCEPT_DR7_READ);
-       set_dr_intercept(svm, INTERCEPT_DR0_WRITE);
-       set_dr_intercept(svm, INTERCEPT_DR1_WRITE);
-       set_dr_intercept(svm, INTERCEPT_DR2_WRITE);
-       set_dr_intercept(svm, INTERCEPT_DR3_WRITE);
-       set_dr_intercept(svm, INTERCEPT_DR4_WRITE);
-       set_dr_intercept(svm, INTERCEPT_DR5_WRITE);
-       set_dr_intercept(svm, INTERCEPT_DR6_WRITE);
-       set_dr_intercept(svm, INTERCEPT_DR7_WRITE);
+       set_dr_intercepts(svm);
  
        set_exception_intercept(svm, PF_VECTOR);
        set_exception_intercept(svm, UD_VECTOR);
@@@ -1684,6 -1684,21 +1684,21 @@@ static void svm_set_dr6(struct kvm_vcp
        mark_dirty(svm->vmcb, VMCB_DR);
  }
  
+ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+       get_debugreg(vcpu->arch.db[0], 0);
+       get_debugreg(vcpu->arch.db[1], 1);
+       get_debugreg(vcpu->arch.db[2], 2);
+       get_debugreg(vcpu->arch.db[3], 3);
+       vcpu->arch.dr6 = svm_get_dr6(vcpu);
+       vcpu->arch.dr7 = svm->vmcb->save.dr7;
+       vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
+       set_dr_intercepts(svm);
+ }
  static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
@@@ -2842,6 -2857,7 +2857,7 @@@ static int iret_interception(struct vcp
        clr_intercept(svm, INTERCEPT_IRET);
        svm->vcpu.arch.hflags |= HF_IRET_MASK;
        svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
+       kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
        return 1;
  }
  
@@@ -2974,6 -2990,17 +2990,17 @@@ static int dr_interception(struct vcpu_
        unsigned long val;
        int err;
  
+       if (svm->vcpu.guest_debug == 0) {
+               /*
+                * No more DR vmexits; force a reload of the debug registers
+                * and reenter on this instruction.  The next vmexit will
+                * retrieve the full state of the debug registers.
+                */
+               clr_dr_intercepts(svm);
+               svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
+               return 1;
+       }
        if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
                return emulate_on_interception(svm);
  
@@@ -3002,8 -3029,10 +3029,8 @@@ static int cr8_write_interception(struc
        u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
        /* instruction emulation calls kvm_set_cr8() */
        r = cr_interception(svm);
 -      if (irqchip_in_kernel(svm->vcpu.kvm)) {
 -              clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
 +      if (irqchip_in_kernel(svm->vcpu.kvm))
                return r;
 -      }
        if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
                return r;
        kvm_run->exit_reason = KVM_EXIT_SET_TPR;
@@@ -3565,8 -3594,6 +3592,8 @@@ static void update_cr8_intercept(struc
        if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
                return;
  
 +      clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
 +
        if (irr == -1)
                return;
  
@@@ -3649,7 -3676,7 +3676,7 @@@ static int svm_interrupt_allowed(struc
        return ret;
  }
  
- static int enable_irq_window(struct kvm_vcpu *vcpu)
+ static void enable_irq_window(struct kvm_vcpu *vcpu)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
  
                svm_set_vintr(svm);
                svm_inject_irq(svm, 0x0);
        }
-       return 0;
  }
  
- static int enable_nmi_window(struct kvm_vcpu *vcpu)
+ static void enable_nmi_window(struct kvm_vcpu *vcpu)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
  
        if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
            == HF_NMI_MASK)
-               return 0; /* IRET will cause a vm exit */
+               return; /* IRET will cause a vm exit */
  
        /*
         * Something prevents NMI from been injected. Single step over possible
        svm->nmi_singlestep = true;
        svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
        update_db_bp_intercept(vcpu);
-       return 0;
  }
  
  static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@@ -4064,6 -4089,11 +4089,11 @@@ static bool svm_invpcid_supported(void
        return false;
  }
  
+ static bool svm_mpx_supported(void)
+ {
+       return false;
+ }
  static bool svm_has_wbinvd_exit(void)
  {
        return true;
@@@ -4302,6 -4332,7 +4332,7 @@@ static struct kvm_x86_ops svm_x86_ops 
        .get_dr6 = svm_get_dr6,
        .set_dr6 = svm_set_dr6,
        .set_dr7 = svm_set_dr7,
+       .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
        .cache_reg = svm_cache_reg,
        .get_rflags = svm_get_rflags,
        .set_rflags = svm_set_rflags,
  
        .rdtscp_supported = svm_rdtscp_supported,
        .invpcid_supported = svm_invpcid_supported,
+       .mpx_supported = svm_mpx_supported,
  
        .set_supported_cpuid = svm_set_supported_cpuid,
  
diff --combined virt/kvm/kvm_main.c
index b5ec7fb986f6a560a258c99b8af3977aff936c2d,5fd4cf8e8888585033d44cf113d2931b4de08572..56baae8c2f56baf0f41bb7bb24b0b12679267d20
@@@ -102,7 -102,7 +102,7 @@@ static void kvm_release_pfn_dirty(pfn_
  static void mark_page_dirty_in_slot(struct kvm *kvm,
                                    struct kvm_memory_slot *memslot, gfn_t gfn);
  
 -bool kvm_rebooting;
 +__visible bool kvm_rebooting;
  EXPORT_SYMBOL_GPL(kvm_rebooting);
  
  static bool largepages_enabled = true;
@@@ -186,12 -186,9 +186,9 @@@ static bool make_all_cpus_request(struc
  
  void kvm_flush_remote_tlbs(struct kvm *kvm)
  {
-       long dirty_count = kvm->tlbs_dirty;
-       smp_mb();
        if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
                ++kvm->stat.remote_tlb_flush;
-       cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
+       kvm->tlbs_dirty = false;
  }
  EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
  
@@@ -1804,7 -1801,7 +1801,7 @@@ void kvm_vcpu_on_spin(struct kvm_vcpu *
                                continue;
                        if (vcpu == me)
                                continue;
-                       if (waitqueue_active(&vcpu->wq))
+                       if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
                                continue;
                        if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
                                continue;
@@@ -2283,6 -2280,11 +2280,11 @@@ static int kvm_ioctl_create_device(stru
        case KVM_DEV_TYPE_ARM_VGIC_V2:
                ops = &kvm_arm_vgic_v2_ops;
                break;
+ #endif
+ #ifdef CONFIG_S390
+       case KVM_DEV_TYPE_FLIC:
+               ops = &kvm_flic_ops;
+               break;
  #endif
        default:
                return -ENODEV;