Merge tag 'kvm-3.15-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 2 Apr 2014 21:50:10 +0000 (14:50 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 2 Apr 2014 21:50:10 +0000 (14:50 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 2 Apr 2014 21:50:10 +0000 (14:50 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 2 Apr 2014 21:50:10 +0000 (14:50 -0700)
diff --combined arch/arm64/include/asm/kvm_arm.h

index 21ef48d32ff271fbdccba7f2df710b021f9db7e0,00fbaa75dc7bcf55541c7bf00af609dfb63529a9..3d6903006a8aacf1b6b561db1d426958844665ef
--- 1/arch/arm64/include/asm/kvm_arm.h
--- 2/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@@ -62,6 -62,7 +62,7 @@@
    * RW:                64bit by default, can be overriden for 32bit VMs
    * TAC:               Trap ACTLR
    * TSC:               Trap SMC
+  * TVM:               Trap VM ops (until M+C set in SCTLR_EL1)
    * TSW:               Trap cache operations by set/way
    * TWE:               Trap WFE
    * TWI:               Trap WFI
@@@ -74,7 -75,7 +75,7 @@@
    * SWIO:      Turn set/way invalidates into set/way clean+invalidate
    */
   #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
-                        HCR_BSU_IS | HCR_FB | HCR_TAC | \
+                        HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
                          HCR_AMO | HCR_IMO | HCR_FMO | \
                          HCR_SWIO | HCR_TIDCP | HCR_RW)
   #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
@@@ -106,6 -107,7 +107,6 @@@
   
   /* VTCR_EL2 Registers bits */
   #define VTCR_EL2_PS_MASK      (7 << 16)
- -#define VTCR_EL2_PS_40B               (2 << 16)
   #define VTCR_EL2_TG0_MASK     (1 << 14)
   #define VTCR_EL2_TG0_4K               (0 << 14)
   #define VTCR_EL2_TG0_64K      (1 << 14)
@@@ -128,9 -130,10 +129,9 @@@
    * 64kB pages (TG0 = 1)
    * 2 level page tables (SL = 1)
    */
- -#define VTCR_EL2_FLAGS                (VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \
- -                               VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
- -                               VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
- -                               VTCR_EL2_T0SZ_40B)
+ +#define VTCR_EL2_FLAGS                (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
+ +                               VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+ +                               VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
   #define VTTBR_X               (38 - VTCR_EL2_T0SZ_40B)
   #else
   /*
@@@ -140,9 -143,10 +141,9 @@@
    * 4kB pages (TG0 = 0)
    * 3 level page tables (SL = 1)
    */
- -#define VTCR_EL2_FLAGS                (VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \
- -                               VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
- -                               VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
- -                               VTCR_EL2_T0SZ_40B)
+ +#define VTCR_EL2_FLAGS                (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
+ +                               VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+ +                               VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
   #define VTTBR_X               (37 - VTCR_EL2_T0SZ_40B)
   #endif
   
diff --combined arch/powerpc/include/asm/reg.h

index 1a36b8ede41736f91b305eeb3b1f3c1ac225abcd,ce17815b8b55a95dd38439bfea2bf95f8d1c98bc..0dcc48af25a302759a5c0aca3f46b5abd7fb2507
--- 1/arch/powerpc/include/asm/reg.h
--- 2/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@@ -213,6 -213,7 +213,7 @@@
   #define SPRN_ACOP     0x1F    /* Available Coprocessor Register */
   #define SPRN_TFIAR    0x81    /* Transaction Failure Inst Addr   */
   #define SPRN_TEXASR   0x82    /* Transaction EXception & Summary */
+ #define   TEXASR_FS   __MASK(63-36)   /* Transaction Failure Summary */
   #define SPRN_TEXASRU  0x83    /* ''      ''      ''    Upper 32  */
   #define SPRN_TFHAR    0x80    /* Transaction Failure Handler Addr */
   #define SPRN_CTRLF    0x088
@@@ -577,13 -578,9 +578,13 @@@
   #define SPRN_SPRG3    0x113   /* Special Purpose Register General 3 */
   #define SPRN_USPRG3   0x103   /* SPRG3 userspace read */
   #define SPRN_SPRG4    0x114   /* Special Purpose Register General 4 */
+ +#define SPRN_USPRG4   0x104   /* SPRG4 userspace read */
   #define SPRN_SPRG5    0x115   /* Special Purpose Register General 5 */
+ +#define SPRN_USPRG5   0x105   /* SPRG5 userspace read */
   #define SPRN_SPRG6    0x116   /* Special Purpose Register General 6 */
+ +#define SPRN_USPRG6   0x106   /* SPRG6 userspace read */
   #define SPRN_SPRG7    0x117   /* Special Purpose Register General 7 */
+ +#define SPRN_USPRG7   0x107   /* SPRG7 userspace read */
   #define SPRN_SRR0     0x01A   /* Save/Restore Register 0 */
   #define SPRN_SRR1     0x01B   /* Save/Restore Register 1 */
   #define   SRR1_ISI_NOPT               0x40000000 /* ISI: Not found in hash */
@@@ -668,14 -665,12 +669,14 @@@
   #define   MMCR0_PMXE  0x04000000UL /* performance monitor exception enable */
   #define   MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */
   #define   MMCR0_TBEE  0x00400000UL /* time base exception enable */
+ +#define   MMCR0_BHRBA 0x00200000UL /* BHRB Access allowed in userspace */
   #define   MMCR0_EBE   0x00100000UL /* Event based branch enable */
   #define   MMCR0_PMCC  0x000c0000UL /* PMC control */
   #define   MMCR0_PMCC_U6       0x00080000UL /* PMC1-6 are R/W by user (PR) */
   #define   MMCR0_PMC1CE        0x00008000UL /* PMC1 count enable*/
   #define   MMCR0_PMCjCE        0x00004000UL /* PMCj count enable*/
   #define   MMCR0_TRIGGER       0x00002000UL /* TRIGGER enable */
+ +#define   MMCR0_PMAO_SYNC 0x00000800UL /* PMU interrupt is synchronous */
   #define   MMCR0_PMAO  0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */
   #define   MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */
   #define   MMCR0_FC56  0x00000010UL /* freeze counters 5 and 6 */
@@@ -709,7 -704,6 +710,7 @@@
   #define SPRN_EBBHR    804     /* Event based branch handler register */
   #define SPRN_EBBRR    805     /* Event based branch return register */
   #define SPRN_BESCR    806     /* Branch event status and control register */
+ +#define   BESCR_GE    0x8000000000000000ULL /* Global Enable */
   #define SPRN_WORT     895     /* Workload optimization register - thread */
   
   #define SPRN_PMC1     787
@@@ -886,10 -880,11 +887,10 @@@
    * 64-bit embedded
    *    - SPRG0 generic exception scratch
    *    - SPRG2 TLB exception stack
- - *    - SPRG3 critical exception scratch and
- - *        CPU and NUMA node for VDSO getcpu (user visible)
+ + *    - SPRG3 critical exception scratch (user visible, sorry!)
    *    - SPRG4 unused (user visible)
    *    - SPRG6 TLB miss scratch (user visible, sorry !)
- - *    - SPRG7 critical exception scratch
+ + *    - SPRG7 CPU and NUMA node for VDSO getcpu (user visible)
    *    - SPRG8 machine check exception scratch
    *    - SPRG9 debug exception scratch
    *
@@@ -946,8 -941,6 +947,8 @@@
   #define SPRN_SPRG_SCRATCH0    SPRN_SPRG2
   #define SPRN_SPRG_HPACA               SPRN_HSPRG0
   #define SPRN_SPRG_HSCRATCH0   SPRN_HSPRG1
+ +#define SPRN_SPRG_VDSO_READ   SPRN_USPRG3
+ +#define SPRN_SPRG_VDSO_WRITE  SPRN_SPRG3
   
   #define GET_PACA(rX)                                  \
         BEGIN_FTR_SECTION_NESTED(66);                   \
@@@ -991,8 -984,6 +992,8 @@@
   #define SPRN_SPRG_TLB_SCRATCH SPRN_SPRG6
   #define SPRN_SPRG_GEN_SCRATCH SPRN_SPRG0
   #define SPRN_SPRG_GDBELL_SCRATCH SPRN_SPRG_GEN_SCRATCH
+ +#define SPRN_SPRG_VDSO_READ   SPRN_USPRG7
+ +#define SPRN_SPRG_VDSO_WRITE  SPRN_SPRG7
   
   #define SET_PACA(rX)  mtspr   SPRN_SPRG_PACA,rX
   #define GET_PACA(rX)  mfspr   rX,SPRN_SPRG_PACA
@@@ -1112,8 -1103,6 +1113,8 @@@
   #define PVR_8560      0x80200000
   #define PVR_VER_E500V1        0x8020
   #define PVR_VER_E500V2        0x8021
+ +#define PVR_VER_E500MC        0x8023
+ +#define PVR_VER_E5500 0x8024
   #define PVR_VER_E6500 0x8040
   
   /*
diff --combined arch/powerpc/kvm/book3s_hv_rmhandlers.S

index 53d647f8e7413bd9117d860b74b503fff8b136c7,4963335198fa94524266ba977cf113de274b97dd..ffbb871c2bd803827fa5a78658f29d2fa8a1dbd6
--- 1/arch/powerpc/kvm/book3s_hv_rmhandlers.S
--- 2/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@@ -28,6 -28,9 +28,9 @@@
   #include <asm/exception-64s.h>
   #include <asm/kvm_book3s_asm.h>
   #include <asm/mmu-hash64.h>
+ #include <asm/tm.h>
+ 
+ #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
   
   #ifdef __LITTLE_ENDIAN__
   #error Need to fix lppaca and SLB shadow accesses in little endian mode
@@@ -75,8 -78,8 +78,8 @@@ BEGIN_FTR_SECTIO
   END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
   
         /* Restore SPRG3 */
- -      ld      r3,PACA_SPRG3(r13)
- -      mtspr   SPRN_SPRG3,r3
+ +      ld      r3,PACA_SPRG_VDSO(r13)
+ +      mtspr   SPRN_SPRG_VDSO_WRITE,r3
   
         /* Reload the host's PMU registers */
         ld      r3, PACALPPACAPTR(r13)  /* is the host using the PMU? */
@@@ -106,8 -109,18 +109,18 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201
         ld      r3, HSTATE_MMCR(r13)
         ld      r4, HSTATE_MMCR + 8(r13)
         ld      r5, HSTATE_MMCR + 16(r13)
+       ld      r6, HSTATE_MMCR + 24(r13)
+       ld      r7, HSTATE_MMCR + 32(r13)
         mtspr   SPRN_MMCR1, r4
         mtspr   SPRN_MMCRA, r5
+       mtspr   SPRN_SIAR, r6
+       mtspr   SPRN_SDAR, r7
+ BEGIN_FTR_SECTION
+       ld      r8, HSTATE_MMCR + 40(r13)
+       ld      r9, HSTATE_MMCR + 48(r13)
+       mtspr   SPRN_MMCR2, r8
+       mtspr   SPRN_SIER, r9
+ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         mtspr   SPRN_MMCR0, r3
         isync
   23:
@@@ -597,6 -610,116 +610,116 @@@ BEGIN_FTR_SECTIO
    END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
   END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
   
+ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ BEGIN_FTR_SECTION
+       b       skip_tm
+ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+ 
+       /* Turn on TM/FP/VSX/VMX so we can restore them. */
+       mfmsr   r5
+       li      r6, MSR_TM >> 32
+       sldi    r6, r6, 32
+       or      r5, r5, r6
+       ori     r5, r5, MSR_FP
+       oris    r5, r5, (MSR_VEC | MSR_VSX)@h
+       mtmsrd  r5
+ 
+       /*
+        * The user may change these outside of a transaction, so they must
+        * always be context switched.
+        */
+       ld      r5, VCPU_TFHAR(r4)
+       ld      r6, VCPU_TFIAR(r4)
+       ld      r7, VCPU_TEXASR(r4)
+       mtspr   SPRN_TFHAR, r5
+       mtspr   SPRN_TFIAR, r6
+       mtspr   SPRN_TEXASR, r7
+ 
+       ld      r5, VCPU_MSR(r4)
+       rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+       beq     skip_tm /* TM not active in guest */
+ 
+       /* Make sure the failure summary is set, otherwise we'll program check
+        * when we trechkpt.  It's possible that this might have been not set
+        * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
+        * host.
+        */
+       oris    r7, r7, (TEXASR_FS)@h
+       mtspr   SPRN_TEXASR, r7
+ 
+       /*
+        * We need to load up the checkpointed state for the guest.
+        * We need to do this early as it will blow away any GPRs, VSRs and
+        * some SPRs.
+        */
+ 
+       mr      r31, r4
+       addi    r3, r31, VCPU_FPRS_TM
+       bl      .load_fp_state
+       addi    r3, r31, VCPU_VRS_TM
+       bl      .load_vr_state
+       mr      r4, r31
+       lwz     r7, VCPU_VRSAVE_TM(r4)
+       mtspr   SPRN_VRSAVE, r7
+ 
+       ld      r5, VCPU_LR_TM(r4)
+       lwz     r6, VCPU_CR_TM(r4)
+       ld      r7, VCPU_CTR_TM(r4)
+       ld      r8, VCPU_AMR_TM(r4)
+       ld      r9, VCPU_TAR_TM(r4)
+       mtlr    r5
+       mtcr    r6
+       mtctr   r7
+       mtspr   SPRN_AMR, r8
+       mtspr   SPRN_TAR, r9
+ 
+       /*
+        * Load up PPR and DSCR values but don't put them in the actual SPRs
+        * till the last moment to avoid running with userspace PPR and DSCR for
+        * too long.
+        */
+       ld      r29, VCPU_DSCR_TM(r4)
+       ld      r30, VCPU_PPR_TM(r4)
+ 
+       std     r2, PACATMSCRATCH(r13) /* Save TOC */
+ 
+       /* Clear the MSR RI since r1, r13 are all going to be foobar. */
+       li      r5, 0
+       mtmsrd  r5, 1
+ 
+       /* Load GPRs r0-r28 */
+       reg = 0
+       .rept   29
+       ld      reg, VCPU_GPRS_TM(reg)(r31)
+       reg = reg + 1
+       .endr
+ 
+       mtspr   SPRN_DSCR, r29
+       mtspr   SPRN_PPR, r30
+ 
+       /* Load final GPRs */
+       ld      29, VCPU_GPRS_TM(29)(r31)
+       ld      30, VCPU_GPRS_TM(30)(r31)
+       ld      31, VCPU_GPRS_TM(31)(r31)
+ 
+       /* TM checkpointed state is now setup.  All GPRs are now volatile. */
+       TRECHKPT
+ 
+       /* Now let's get back the state we need. */
+       HMT_MEDIUM
+       GET_PACA(r13)
+       ld      r29, HSTATE_DSCR(r13)
+       mtspr   SPRN_DSCR, r29
+       ld      r4, HSTATE_KVM_VCPU(r13)
+       ld      r1, HSTATE_HOST_R1(r13)
+       ld      r2, PACATMSCRATCH(r13)
+ 
+       /* Set the MSR RI since we have our registers back. */
+       li      r5, MSR_RI
+       mtmsrd  r5, 1
+ skip_tm:
+ #endif
+ 
         /* Load guest PMU registers */
         /* R4 is live here (vcpu pointer) */
         li      r3, 1
@@@ -704,14 -827,6 +827,6 @@@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S
         ld      r6, VCPU_VTB(r4)
         mtspr   SPRN_IC, r5
         mtspr   SPRN_VTB, r6
- #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-       ld      r5, VCPU_TFHAR(r4)
-       ld      r6, VCPU_TFIAR(r4)
-       ld      r7, VCPU_TEXASR(r4)
-       mtspr   SPRN_TFHAR, r5
-       mtspr   SPRN_TFIAR, r6
-       mtspr   SPRN_TEXASR, r7
- #endif
         ld      r8, VCPU_EBBHR(r4)
         mtspr   SPRN_EBBHR, r8
         ld      r5, VCPU_EBBRR(r4)
@@@ -736,6 -851,10 +851,10 @@@
          * Set the decrementer to the guest decrementer.
          */
         ld      r8,VCPU_DEC_EXPIRES(r4)
+       /* r8 is a host timebase value here, convert to guest TB */
+       ld      r5,HSTATE_KVM_VCORE(r13)
+       ld      r6,VCORE_TB_OFFSET(r5)
+       add     r8,r8,r6
         mftb    r7
         subf    r3,r7,r8
         mtspr   SPRN_DEC,r3
@@@ -817,7 -936,8 +936,8 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206
   12:   mtspr   SPRN_SRR0, r10
         mr      r10,r0
         mtspr   SPRN_SRR1, r11
-       ld      r11, VCPU_INTR_MSR(r4)
+       mr      r9, r4
+       bl      kvmppc_msr_interrupt
   5:
   
   /*
@@@ -1098,17 -1218,15 +1218,15 @@@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201
         mftb    r6
         extsw   r5,r5
         add     r5,r5,r6
+       /* r5 is a guest timebase value here, convert to host TB */
+       ld      r3,HSTATE_KVM_VCORE(r13)
+       ld      r4,VCORE_TB_OFFSET(r3)
+       subf    r5,r4,r5
         std     r5,VCPU_DEC_EXPIRES(r9)
   
   BEGIN_FTR_SECTION
         b       8f
   END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
-       /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
-       mfmsr   r8
-       li      r0, 1
-       rldimi  r8, r0, MSR_TM_LG, 63-MSR_TM_LG
-       mtmsrd  r8
- 
         /* Save POWER8-specific registers */
         mfspr   r5, SPRN_IAMR
         mfspr   r6, SPRN_PSPB
@@@ -1122,14 -1240,6 +1240,6 @@@
         std     r5, VCPU_IC(r9)
         std     r6, VCPU_VTB(r9)
         std     r7, VCPU_TAR(r9)
- #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-       mfspr   r5, SPRN_TFHAR
-       mfspr   r6, SPRN_TFIAR
-       mfspr   r7, SPRN_TEXASR
-       std     r5, VCPU_TFHAR(r9)
-       std     r6, VCPU_TFIAR(r9)
-       std     r7, VCPU_TEXASR(r9)
- #endif
         mfspr   r8, SPRN_EBBHR
         std     r8, VCPU_EBBHR(r9)
         mfspr   r5, SPRN_EBBRR
@@@ -1387,7 -1497,7 +1497,7 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S
         ld      r8,VCORE_TB_OFFSET(r5)
         cmpdi   r8,0
         beq     17f
-       mftb    r6                      /* current host timebase */
+       mftb    r6                      /* current guest timebase */
         subf    r8,r8,r6
         mtspr   SPRN_TBU40,r8           /* update upper 40 bits */
         mftb    r7                      /* check if lower 24 bits overflowed */
@@@ -1557,7 -1667,7 +1667,7 @@@ kvmppc_hdsi
         mtspr   SPRN_SRR0, r10
         mtspr   SPRN_SRR1, r11
         li      r10, BOOK3S_INTERRUPT_DATA_STORAGE
-       ld      r11, VCPU_INTR_MSR(r9)
+       bl      kvmppc_msr_interrupt
   fast_interrupt_c_return:
   6:    ld      r7, VCPU_CTR(r9)
         lwz     r8, VCPU_XER(r9)
@@@ -1626,7 -1736,7 +1736,7 @@@ kvmppc_hisi
   1:    mtspr   SPRN_SRR0, r10
         mtspr   SPRN_SRR1, r11
         li      r10, BOOK3S_INTERRUPT_INST_STORAGE
-       ld      r11, VCPU_INTR_MSR(r9)
+       bl      kvmppc_msr_interrupt
         b       fast_interrupt_c_return
   
   3:    ld      r6, VCPU_KVM(r9)        /* not relocated, use VRMA */
@@@ -1669,7 -1779,7 +1779,7 @@@ sc_1_fast_return
         mtspr   SPRN_SRR0,r10
         mtspr   SPRN_SRR1,r11
         li      r10, BOOK3S_INTERRUPT_SYSCALL
-       ld      r11, VCPU_INTR_MSR(r9)
+       bl      kvmppc_msr_interrupt
         mr      r4,r9
         b       fast_guest_return
   
@@@ -1691,7 -1801,7 +1801,7 @@@ hcall_real_table
         .long   0               /* 0x10 - H_CLEAR_MOD */
         .long   0               /* 0x14 - H_CLEAR_REF */
         .long   .kvmppc_h_protect - hcall_real_table
-       .long   0               /* 0x1c - H_GET_TCE */
+       .long   .kvmppc_h_get_tce - hcall_real_table
         .long   .kvmppc_h_put_tce - hcall_real_table
         .long   0               /* 0x24 - H_SET_SPRG0 */
         .long   .kvmppc_h_set_dabr - hcall_real_table
@@@ -1997,7 -2107,7 +2107,7 @@@ machine_check_realmode
         beq     mc_cont
         /* If not, deliver a machine check.  SRR0/1 are already set */
         li      r10, BOOK3S_INTERRUPT_MACHINE_CHECK
-       ld      r11, VCPU_INTR_MSR(r9)
+       bl      kvmppc_msr_interrupt
         b       fast_interrupt_c_return
   
   /*
@@@ -2138,8 -2248,6 +2248,6 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC
         mfspr   r6,SPRN_VRSAVE
         stw     r6,VCPU_VRSAVE(r31)
         mtlr    r30
-       mtmsrd  r5
-       isync
         blr
   
   /*
@@@ -2186,3 -2294,20 +2294,20 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC
    */
   kvmppc_bad_host_intr:
         b       .
+ 
+ /*
+  * This mimics the MSR transition on IRQ delivery.  The new guest MSR is taken
+  * from VCPU_INTR_MSR and is modified based on the required TM state changes.
+  *   r11 has the guest MSR value (in/out)
+  *   r9 has a vcpu pointer (in)
+  *   r0 is used as a scratch register
+  */
+ kvmppc_msr_interrupt:
+       rldicl  r0, r11, 64 - MSR_TS_S_LG, 62
+       cmpwi   r0, 2 /* Check if we are in transactional state..  */
+       ld      r11, VCPU_INTR_MSR(r9)
+       bne     1f
+       /* ... if transactional, change to suspended */
+       li      r0, 1
+ 1:    rldimi  r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+       blr
diff --combined arch/s390/include/asm/kvm_host.h

index 9bf95bb30f1a6cf27d0494396168f87e23cf8a65,68897fc6595081af3acd0cb9007fef661c676563..154b60089be996de483f07844f9229c728918892
--- 1/arch/s390/include/asm/kvm_host.h
--- 2/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@@ -16,12 -16,22 +16,22 @@@
   #include <linux/hrtimer.h>
   #include <linux/interrupt.h>
   #include <linux/kvm_host.h>
+ #include <linux/kvm.h>
   #include <asm/debug.h>
   #include <asm/cpu.h>
+ #include <asm/isc.h>
   
   #define KVM_MAX_VCPUS 64
   #define KVM_USER_MEM_SLOTS 32
   
+ /*
+  * These seem to be used for allocating ->chip in the routing table,
+  * which we don't use. 4096 is an out-of-thin-air value. If we need
+  * to look at ->chip later on, we'll need to revisit this.
+  */
+ #define KVM_NR_IRQCHIPS 1
+ #define KVM_IRQCHIP_NUM_PINS 4096
+ 
   struct sca_entry {
         atomic_t scn;
         __u32   reserved;
@@@ -106,9 -116,9 +116,11 @@@ struct kvm_s390_sie_block 
         __u64   gbea;                   /* 0x0180 */
         __u8    reserved188[24];        /* 0x0188 */
         __u32   fac;                    /* 0x01a0 */
- -      __u8    reserved1a4[58];        /* 0x01a4 */
+ +      __u8    reserved1a4[20];        /* 0x01a4 */
+ +      __u64   cbrlo;                  /* 0x01b8 */
-       __u8    reserved1c0[40];        /* 0x01c0 */
++      __u8    reserved1c0[30];        /* 0x01c0 */
+       __u64   pp;                     /* 0x01de */
+       __u8    reserved1e6[2];         /* 0x01e6 */
         __u64   itdba;                  /* 0x01e8 */
         __u8    reserved1f0[16];        /* 0x01f0 */
   } __attribute__((packed));
@@@ -157,7 -167,6 +169,7 @@@ struct kvm_vcpu_stat 
         u32 instruction_stsi;
         u32 instruction_stfl;
         u32 instruction_tprot;
+ +      u32 instruction_essa;
         u32 instruction_sigp_sense;
         u32 instruction_sigp_sense_running;
         u32 instruction_sigp_external_call;
@@@ -171,18 -180,6 +183,6 @@@
         u32 diagnose_9c;
   };
   
- struct kvm_s390_io_info {
-       __u16        subchannel_id;            /* 0x0b8 */
-       __u16        subchannel_nr;            /* 0x0ba */
-       __u32        io_int_parm;              /* 0x0bc */
-       __u32        io_int_word;              /* 0x0c0 */
- };
- 
- struct kvm_s390_ext_info {
-       __u32 ext_params;
-       __u64 ext_params2;
- };
- 
   #define PGM_OPERATION            0x01
   #define PGM_PRIVILEGED_OP      0x02
   #define PGM_EXECUTE              0x03
@@@ -191,27 -188,6 +191,6 @@@
   #define PGM_SPECIFICATION        0x06
   #define PGM_DATA                 0x07
   
- struct kvm_s390_pgm_info {
-       __u16 code;
- };
- 
- struct kvm_s390_prefix_info {
-       __u32 address;
- };
- 
- struct kvm_s390_extcall_info {
-       __u16 code;
- };
- 
- struct kvm_s390_emerg_info {
-       __u16 code;
- };
- 
- struct kvm_s390_mchk_info {
-       __u64 cr14;
-       __u64 mcic;
- };
- 
   struct kvm_s390_interrupt_info {
         struct list_head list;
         u64     type;
@@@ -246,9 -222,8 +225,8 @@@ struct kvm_s390_float_interrupt 
         struct list_head list;
         atomic_t active;
         int next_rr_cpu;
-       unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1)
-                               / sizeof(long)];
-       struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS];
+       unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+       unsigned int irq_count;
   };
   
   
@@@ -265,6 -240,10 +243,10 @@@ struct kvm_vcpu_arch 
                 u64             stidp_data;
         };
         struct gmap *gmap;
+ #define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
+       unsigned long pfault_token;
+       unsigned long pfault_select;
+       unsigned long pfault_compare;
   };
   
   struct kvm_vm_stat {
@@@ -274,12 -253,36 +256,36 @@@
   struct kvm_arch_memory_slot {
   };
   
+ struct s390_map_info {
+       struct list_head list;
+       __u64 guest_addr;
+       __u64 addr;
+       struct page *page;
+ };
+ 
+ struct s390_io_adapter {
+       unsigned int id;
+       int isc;
+       bool maskable;
+       bool masked;
+       bool swap;
+       struct rw_semaphore maps_lock;
+       struct list_head maps;
+       atomic_t nr_maps;
+ };
+ 
+ #define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
+ #define MAX_S390_ADAPTER_MAPS 256
+ 
   struct kvm_arch{
         struct sca_block *sca;
         debug_info_t *dbf;
         struct kvm_s390_float_interrupt float_int;
+       struct kvm_device *flic;
         struct gmap *gmap;
         int css_support;
+       int use_irqchip;
+       struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
   };
   
   #define KVM_HVA_ERR_BAD               (-1UL)
@@@ -290,6 -293,24 +296,24 @@@ static inline bool kvm_is_error_hva(uns
         return IS_ERR_VALUE(addr);
   }
   
+ #define ASYNC_PF_PER_VCPU     64
+ struct kvm_vcpu;
+ struct kvm_async_pf;
+ struct kvm_arch_async_pf {
+       unsigned long pfault_token;
+ };
+ 
+ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+ 
+ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+                              struct kvm_async_pf *work);
+ 
+ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+                                    struct kvm_async_pf *work);
+ 
+ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+                                struct kvm_async_pf *work);
+ 
   extern int sie64a(struct kvm_s390_sie_block *, u64 *);
   extern char sie_exit;
   #endif
diff --combined arch/s390/include/asm/pgtable.h

index 1ab75eaacbd417079d3e6de25e8401c87e24a1d7,66101f6c6d819354d2e04ef33a275cec253c2edd..50a75d96f9394faeb60a4ca5d8ca0f1411d754c0
--- 1/arch/s390/include/asm/pgtable.h
--- 2/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@@ -229,7 -229,6 +229,7 @@@ extern unsigned long MODULES_END
   #define _PAGE_READ    0x010           /* SW pte read bit */
   #define _PAGE_WRITE   0x020           /* SW pte write bit */
   #define _PAGE_SPECIAL 0x040           /* SW associated with special page */
+ +#define _PAGE_UNUSED  0x080           /* SW bit for pgste usage state */
   #define __HAVE_ARCH_PTE_SPECIAL
   
   /* Set of bits not changed in pte_modify */
@@@ -395,12 -394,6 +395,12 @@@
   
   #endif /* CONFIG_64BIT */
   
+ +/* Guest Page State used for virtualization */
+ +#define _PGSTE_GPS_ZERO               0x0000000080000000UL
+ +#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL
+ +#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
+ +#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
+ +
   /*
    * A user page table pointer has the space-switch-event bit, the
    * private-space-control bit and the storage-alteration-event-control
@@@ -624,14 -617,6 +624,14 @@@ static inline int pte_none(pte_t pte
         return pte_val(pte) == _PAGE_INVALID;
   }
   
+ +static inline int pte_swap(pte_t pte)
+ +{
+ +      /* Bit pattern: (pte & 0x603) == 0x402 */
+ +      return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT |
+ +                              _PAGE_TYPE | _PAGE_PRESENT))
+ +              == (_PAGE_INVALID | _PAGE_TYPE);
+ +}
+ +
   static inline int pte_file(pte_t pte)
   {
         /* Bit pattern: (pte & 0x601) == 0x600 */
@@@ -782,6 -767,7 +782,7 @@@ static inline void pgste_set_pte(pte_t 
    * @table: pointer to the page directory
    * @asce: address space control element for gmap page table
    * @crst_list: list of all crst tables used in the guest address space
+  * @pfault_enabled: defines if pfaults are applicable for the guest
    */
   struct gmap {
         struct list_head list;
@@@ -790,6 -776,7 +791,7 @@@
         unsigned long asce;
         void *private;
         struct list_head crst_list;
+       bool pfault_enabled;
   };
   
   /**
@@@ -836,20 -823,20 +838,20 @@@ unsigned long gmap_translate(unsigned l
   unsigned long __gmap_fault(unsigned long address, struct gmap *);
   unsigned long gmap_fault(unsigned long address, struct gmap *);
   void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
+ +void __gmap_zap(unsigned long address, struct gmap *);
   
   void gmap_register_ipte_notifier(struct gmap_notifier *);
   void gmap_unregister_ipte_notifier(struct gmap_notifier *);
   int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
- -void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
+ +void gmap_do_ipte_notify(struct mm_struct *, pte_t *);
   
   static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
- -                                      unsigned long addr,
                                         pte_t *ptep, pgste_t pgste)
   {
   #ifdef CONFIG_PGSTE
         if (pgste_val(pgste) & PGSTE_IN_BIT) {
                 pgste_val(pgste) &= ~PGSTE_IN_BIT;
- -              gmap_do_ipte_notify(mm, addr, ptep);
+ +              gmap_do_ipte_notify(mm, ptep);
         }
   #endif
         return pgste;
@@@ -867,7 -854,6 +869,7 @@@ static inline void set_pte_at(struct mm
   
         if (mm_has_pgste(mm)) {
                 pgste = pgste_get_lock(ptep);
+ +              pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
                 pgste_set_key(ptep, pgste, entry);
                 pgste_set_pte(ptep, entry);
                 pgste_set_unlock(ptep, pgste);
@@@ -897,12 -883,6 +899,12 @@@ static inline int pte_young(pte_t pte
         return (pte_val(pte) & _PAGE_YOUNG) != 0;
   }
   
+ +#define __HAVE_ARCH_PTE_UNUSED
+ +static inline int pte_unused(pte_t pte)
+ +{
+ +      return pte_val(pte) & _PAGE_UNUSED;
+ +}
+ +
   /*
    * pgd/pmd/pte modification functions
    */
@@@ -1056,41 -1036,30 +1058,41 @@@ static inline int ptep_test_and_clear_u
   
   static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
   {
- -      if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ +      unsigned long pto = (unsigned long) ptep;
+ +
   #ifndef CONFIG_64BIT
- -              /* pto must point to the start of the segment table */
- -              pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
- -#else
- -              /* ipte in zarch mode can do the math */
- -              pte_t *pto = ptep;
+ +      /* pto in ESA mode must point to the start of the segment table */
+ +      pto &= 0x7ffffc00;
   #endif
- -              asm volatile(
- -                      "       ipte    %2,%3"
- -                      : "=m" (*ptep) : "m" (*ptep),
- -                        "a" (pto), "a" (address));
- -      }
+ +      /* Invalidation + global TLB flush for the pte */
+ +      asm volatile(
+ +              "       ipte    %2,%3"
+ +              : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
+ +}
+ +
+ +static inline void ptep_flush_direct(struct mm_struct *mm,
+ +                                   unsigned long address, pte_t *ptep)
+ +{
+ +      if (pte_val(*ptep) & _PAGE_INVALID)
+ +              return;
+ +      __ptep_ipte(address, ptep);
   }
   
   static inline void ptep_flush_lazy(struct mm_struct *mm,
                                    unsigned long address, pte_t *ptep)
   {
- -      int active = (mm == current->active_mm) ? 1 : 0;
+ +      int active, count;
   
- -      if (atomic_read(&mm->context.attach_count) > active)
- -              __ptep_ipte(address, ptep);
- -      else
+ +      if (pte_val(*ptep) & _PAGE_INVALID)
+ +              return;
+ +      active = (mm == current->active_mm) ? 1 : 0;
+ +      count = atomic_add_return(0x10000, &mm->context.attach_count);
+ +      if ((count & 0xffff) <= active) {
+ +              pte_val(*ptep) |= _PAGE_INVALID;
                 mm->context.flush_mm = 1;
+ +      } else
+ +              __ptep_ipte(address, ptep);
+ +      atomic_sub(0x10000, &mm->context.attach_count);
   }
   
   #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
@@@ -1103,11 -1072,11 +1105,11 @@@ static inline int ptep_test_and_clear_y
   
         if (mm_has_pgste(vma->vm_mm)) {
                 pgste = pgste_get_lock(ptep);
- -              pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
+ +              pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
         }
   
         pte = *ptep;
- -      __ptep_ipte(addr, ptep);
+ +      ptep_flush_direct(vma->vm_mm, addr, ptep);
         young = pte_young(pte);
         pte = pte_mkold(pte);
   
@@@ -1149,7 -1118,7 +1151,7 @@@ static inline pte_t ptep_get_and_clear(
   
         if (mm_has_pgste(mm)) {
                 pgste = pgste_get_lock(ptep);
- -              pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+ +              pgste = pgste_ipte_notify(mm, ptep, pgste);
         }
   
         pte = *ptep;
@@@ -1173,11 -1142,12 +1175,11 @@@ static inline pte_t ptep_modify_prot_st
   
         if (mm_has_pgste(mm)) {
                 pgste = pgste_get_lock(ptep);
- -              pgste_ipte_notify(mm, address, ptep, pgste);
+ +              pgste_ipte_notify(mm, ptep, pgste);
         }
   
         pte = *ptep;
         ptep_flush_lazy(mm, address, ptep);
- -      pte_val(*ptep) |= _PAGE_INVALID;
   
         if (mm_has_pgste(mm)) {
                 pgste = pgste_update_all(&pte, pgste);
@@@ -1210,17 -1180,14 +1212,17 @@@ static inline pte_t ptep_clear_flush(st
   
         if (mm_has_pgste(vma->vm_mm)) {
                 pgste = pgste_get_lock(ptep);
- -              pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
+ +              pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
         }
   
         pte = *ptep;
- -      __ptep_ipte(address, ptep);
+ +      ptep_flush_direct(vma->vm_mm, address, ptep);
         pte_val(*ptep) = _PAGE_INVALID;
   
         if (mm_has_pgste(vma->vm_mm)) {
+ +              if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
+ +                  _PGSTE_GPS_USAGE_UNUSED)
+ +                      pte_val(pte) |= _PAGE_UNUSED;
                 pgste = pgste_update_all(&pte, pgste);
                 pgste_set_unlock(ptep, pgste);
         }
@@@ -1244,7 -1211,7 +1246,7 @@@ static inline pte_t ptep_get_and_clear_
   
         if (!full && mm_has_pgste(mm)) {
                 pgste = pgste_get_lock(ptep);
- -              pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+ +              pgste = pgste_ipte_notify(mm, ptep, pgste);
         }
   
         pte = *ptep;
@@@ -1269,7 -1236,7 +1271,7 @@@ static inline pte_t ptep_set_wrprotect(
         if (pte_write(pte)) {
                 if (mm_has_pgste(mm)) {
                         pgste = pgste_get_lock(ptep);
- -                      pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+ +                      pgste = pgste_ipte_notify(mm, ptep, pgste);
                 }
   
                 ptep_flush_lazy(mm, address, ptep);
@@@ -1295,10 -1262,10 +1297,10 @@@ static inline int ptep_set_access_flags
                 return 0;
         if (mm_has_pgste(vma->vm_mm)) {
                 pgste = pgste_get_lock(ptep);
- -              pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
+ +              pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
         }
   
- -      __ptep_ipte(address, ptep);
+ +      ptep_flush_direct(vma->vm_mm, address, ptep);
   
         if (mm_has_pgste(vma->vm_mm)) {
                 pgste_set_pte(ptep, entry);
@@@ -1482,16 -1449,12 +1484,16 @@@ static inline pmd_t pmd_mkwrite(pmd_t p
   static inline void pmdp_flush_lazy(struct mm_struct *mm,
                                    unsigned long address, pmd_t *pmdp)
   {
- -      int active = (mm == current->active_mm) ? 1 : 0;
+ +      int active, count;
   
- -      if ((atomic_read(&mm->context.attach_count) & 0xffff) > active)
- -              __pmd_idte(address, pmdp);
- -      else
+ +      active = (mm == current->active_mm) ? 1 : 0;
+ +      count = atomic_add_return(0x10000, &mm->context.attach_count);
+ +      if ((count & 0xffff) <= active) {
+ +              pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
                 mm->context.flush_mm = 1;
+ +      } else
+ +              __pmd_idte(address, pmdp);
+ +      atomic_sub(0x10000, &mm->context.attach_count);
   }
   
   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --combined arch/s390/kernel/irq.c

index a770be97db4da7c513e7952d0360c85a5d5154bb,c288ef7e47b427211e129d7d98dfeebba518a57b..d42b14cc72a4516efa4c976a8e84ad47b97cbaa2
--- 1/arch/s390/kernel/irq.c
--- 2/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@@ -18,7 -18,6 +18,7 @@@
   #include <linux/errno.h>
   #include <linux/slab.h>
   #include <linux/cpu.h>
+ +#include <linux/irq.h>
   #include <asm/irq_regs.h>
   #include <asm/cputime.h>
   #include <asm/lowcore.h>
@@@ -85,6 -84,7 +85,7 @@@ static const struct irq_class irqclass_
         [IRQIO_PCI]  = {.name = "PCI", .desc = "[I/O] PCI Interrupt" },
         [IRQIO_MSI]  = {.name = "MSI", .desc = "[I/O] MSI Interrupt" },
         [IRQIO_VIR]  = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
+       [IRQIO_VAI]  = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
         [NMI_NMI]    = {.name = "NMI", .desc = "[NMI] Machine Check"},
         [CPU_RST]    = {.name = "RST", .desc = "[CPU] CPU Restart"},
   };
diff --combined arch/s390/kvm/diag.c

index 6f9cfa50037246d8d37bb5dd81a21d643ff3f544,bf9ed34c2bcd84af9a196f9a372c60de1c42a7d8..03a05ffb662f98d426302cffb4b08cb5cade7fac
--- 1/arch/s390/kvm/diag.c
--- 2/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@@ -13,11 -13,11 +13,12 @@@
   
   #include <linux/kvm.h>
   #include <linux/kvm_host.h>
+ +#include <asm/pgalloc.h>
   #include <asm/virtio-ccw.h>
   #include "kvm-s390.h"
   #include "trace.h"
   #include "trace-s390.h"
+ #include "gaccess.h"
   
   static int diag_release_pages(struct kvm_vcpu *vcpu)
   {
@@@ -47,6 -47,87 +48,87 @@@
         return 0;
   }
   
+ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
+ {
+       struct prs_parm {
+               u16 code;
+               u16 subcode;
+               u16 parm_len;
+               u16 parm_version;
+               u64 token_addr;
+               u64 select_mask;
+               u64 compare_mask;
+               u64 zarch;
+       };
+       struct prs_parm parm;
+       int rc;
+       u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
+       u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
+       unsigned long hva_token = KVM_HVA_ERR_BAD;
+ 
+       if (vcpu->run->s.regs.gprs[rx] & 7)
+               return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+       if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
+               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+       if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
+               return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ 
+       switch (parm.subcode) {
+       case 0: /* TOKEN */
+               if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
+                       /*
+                        * If the pagefault handshake is already activated,
+                        * the token must not be changed.  We have to return
+                        * decimal 8 instead, as mandated in SC24-6084.
+                        */
+                       vcpu->run->s.regs.gprs[ry] = 8;
+                       return 0;
+               }
+ 
+               if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
+                   parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
+                       return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ 
+               hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
+               if (kvm_is_error_hva(hva_token))
+                       return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ 
+               vcpu->arch.pfault_token = parm.token_addr;
+               vcpu->arch.pfault_select = parm.select_mask;
+               vcpu->arch.pfault_compare = parm.compare_mask;
+               vcpu->run->s.regs.gprs[ry] = 0;
+               rc = 0;
+               break;
+       case 1: /*
+                * CANCEL
+                * Specification allows to let already pending tokens survive
+                * the cancel, therefore to reduce code complexity, we assume
+                * all outstanding tokens are already pending.
+                */
+               if (parm.token_addr || parm.select_mask ||
+                   parm.compare_mask || parm.zarch)
+                       return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ 
+               vcpu->run->s.regs.gprs[ry] = 0;
+               /*
+                * If the pfault handling was not established or is already
+                * canceled SC24-6084 requests to return decimal 4.
+                */
+               if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+                       vcpu->run->s.regs.gprs[ry] = 4;
+               else
+                       vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ 
+               rc = 0;
+               break;
+       default:
+               rc = -EOPNOTSUPP;
+               break;
+       }
+ 
+       return rc;
+ }
+ 
   static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
   {
         VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
@@@ -87,11 -168,9 +169,11 @@@ static int __diag_ipl_functions(struct 
         switch (subcode) {
         case 3:
                 vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
+ +              page_table_reset_pgste(current->mm, 0, TASK_SIZE);
                 break;
         case 4:
                 vcpu->run->s390_reset_flags = 0;
+ +              page_table_reset_pgste(current->mm, 0, TASK_SIZE);
                 break;
         default:
                 return -EOPNOTSUPP;
@@@ -153,6 -232,8 +235,8 @@@ int kvm_s390_handle_diag(struct kvm_vcp
                 return __diag_time_slice_end(vcpu);
         case 0x9c:
                 return __diag_time_slice_end_directed(vcpu);
+       case 0x258:
+               return __diag_page_ref_service(vcpu);
         case 0x308:
                 return __diag_ipl_functions(vcpu);
         case 0x500:
diff --combined arch/s390/kvm/kvm-s390.c

index 10b5db3c9bc4a71d179ed02b994d7fdea6109311,6e1b990e427fcc79e2589dbf25566671c51af815..b3ecb8f5b6ce2bcefb4fe92a64b99d2012cdd770
--- 1/arch/s390/kvm/kvm-s390.c
--- 2/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@@ -68,7 -68,6 +68,7 @@@ struct kvm_stats_debugfs_item debugfs_e
         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
+ +      { "instruction_essa", VCPU_STAT(instruction_essa) },
         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
@@@ -153,11 -152,14 +153,14 @@@ int kvm_dev_ioctl_check_extension(long 
   #ifdef CONFIG_KVM_S390_UCONTROL
         case KVM_CAP_S390_UCONTROL:
   #endif
+       case KVM_CAP_ASYNC_PF:
         case KVM_CAP_SYNC_REGS:
         case KVM_CAP_ONE_REG:
         case KVM_CAP_ENABLE_CAP:
         case KVM_CAP_S390_CSS_SUPPORT:
         case KVM_CAP_IOEVENTFD:
+       case KVM_CAP_DEVICE_CTRL:
+       case KVM_CAP_ENABLE_CAP_VM:
                 r = 1;
                 break;
         case KVM_CAP_NR_VCPUS:
@@@ -186,6 -188,25 +189,25 @@@ int kvm_vm_ioctl_get_dirty_log(struct k
         return 0;
   }
   
+ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+ {
+       int r;
+ 
+       if (cap->flags)
+               return -EINVAL;
+ 
+       switch (cap->cap) {
+       case KVM_CAP_S390_IRQCHIP:
+               kvm->arch.use_irqchip = 1;
+               r = 0;
+               break;
+       default:
+               r = -EINVAL;
+               break;
+       }
+       return r;
+ }
+ 
   long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
   {
@@@ -203,6 -224,26 +225,26 @@@
                 r = kvm_s390_inject_vm(kvm, &s390int);
                 break;
         }
+       case KVM_ENABLE_CAP: {
+               struct kvm_enable_cap cap;
+               r = -EFAULT;
+               if (copy_from_user(&cap, argp, sizeof(cap)))
+                       break;
+               r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+               break;
+       }
+       case KVM_CREATE_IRQCHIP: {
+               struct kvm_irq_routing_entry routing;
+ 
+               r = -EINVAL;
+               if (kvm->arch.use_irqchip) {
+                       /* Set up dummy routing. */
+                       memset(&routing, 0, sizeof(routing));
+                       kvm_set_irq_routing(kvm, &routing, 0, 0);
+                       r = 0;
+               }
+               break;
+       }
         default:
                 r = -ENOTTY;
         }
@@@ -214,6 -255,7 +256,7 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
   {
         int rc;
         char debug_name[16];
+       static unsigned long sca_offset;
   
         rc = -EINVAL;
   #ifdef CONFIG_KVM_S390_UCONTROL
@@@ -235,6 -277,10 +278,10 @@@
         kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
         if (!kvm->arch.sca)
                 goto out_err;
+       spin_lock(&kvm_lock);
+       sca_offset = (sca_offset + 16) & 0x7f0;
+       kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
+       spin_unlock(&kvm_lock);
   
         sprintf(debug_name, "kvm-%u", current->pid);
   
@@@ -255,9 -301,11 +302,11 @@@
                 if (!kvm->arch.gmap)
                         goto out_nogmap;
                 kvm->arch.gmap->private = kvm;
+               kvm->arch.gmap->pfault_enabled = 0;
         }
   
         kvm->arch.css_support = 0;
+       kvm->arch.use_irqchip = 0;
   
         return 0;
   out_nogmap:
@@@ -272,6 -320,7 +321,7 @@@ void kvm_arch_vcpu_destroy(struct kvm_v
   {
         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
+       kvm_clear_async_pf_completion_queue(vcpu);
         if (!kvm_is_ucontrol(vcpu->kvm)) {
                 clear_bit(63 - vcpu->vcpu_id,
                           (unsigned long *) &vcpu->kvm->arch.sca->mcn);
@@@ -284,11 -333,7 +334,11 @@@
         if (kvm_is_ucontrol(vcpu->kvm))
                 gmap_free(vcpu->arch.gmap);
   
+ +      if (vcpu->arch.sie_block->cbrlo)
+ +              __free_page(__pfn_to_page(
+ +                              vcpu->arch.sie_block->cbrlo >> PAGE_SHIFT));
         free_page((unsigned long)(vcpu->arch.sie_block));
+ +
         kvm_vcpu_uninit(vcpu);
         kmem_cache_free(kvm_vcpu_cache, vcpu);
   }
@@@ -320,11 -365,14 +370,14 @@@ void kvm_arch_destroy_vm(struct kvm *kv
         debug_unregister(kvm->arch.dbf);
         if (!kvm_is_ucontrol(kvm))
                 gmap_free(kvm->arch.gmap);
+       kvm_s390_destroy_adapters(kvm);
   }
   
   /* Section: vcpu related */
   int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
   {
+       vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+       kvm_clear_async_pf_completion_queue(vcpu);
         if (kvm_is_ucontrol(vcpu->kvm)) {
                 vcpu->arch.gmap = gmap_alloc(current->mm);
                 if (!vcpu->arch.gmap)
@@@ -385,7 -433,11 +438,11 @@@ static void kvm_s390_vcpu_initial_reset
         vcpu->arch.guest_fpregs.fpc = 0;
         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
         vcpu->arch.sie_block->gbea = 1;
+       vcpu->arch.sie_block->pp = 0;
+       vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+       kvm_clear_async_pf_completion_queue(vcpu);
         atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+       kvm_s390_clear_local_irqs(vcpu);
   }
   
   int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@@ -395,8 -447,6 +452,8 @@@
   
   int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
   {
+ +      struct page *cbrl;
+ +
         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
                                                     CPUSTAT_SM |
                                                     CPUSTAT_STOPPED |
@@@ -408,14 -458,6 +465,14 @@@
         vcpu->arch.sie_block->ecb2  = 8;
         vcpu->arch.sie_block->eca   = 0xC1002001U;
         vcpu->arch.sie_block->fac   = (int) (long) vfacilities;
+ +      if (kvm_enabled_cmma()) {
+ +              cbrl = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ +              if (cbrl) {
+ +                      vcpu->arch.sie_block->ecb2 |= 0x80;
+ +                      vcpu->arch.sie_block->ecb2 &= ~0x08;
+ +                      vcpu->arch.sie_block->cbrlo = page_to_phys(cbrl);
+ +              }
+ +      }
         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
         tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
                      (unsigned long) vcpu);
@@@ -466,11 -508,8 +523,8 @@@ struct kvm_vcpu *kvm_arch_vcpu_create(s
         spin_lock_init(&vcpu->arch.local_int.lock);
         INIT_LIST_HEAD(&vcpu->arch.local_int.list);
         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
-       spin_lock(&kvm->arch.float_int.lock);
-       kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
         vcpu->arch.local_int.wq = &vcpu->wq;
         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
-       spin_unlock(&kvm->arch.float_int.lock);
   
         rc = kvm_vcpu_init(vcpu, kvm, id);
         if (rc)
@@@ -490,9 -529,7 +544,7 @@@ out
   
   int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
   {
-       /* kvm common code refers to this, but never calls it */
-       BUG();
-       return 0;
+       return kvm_cpu_has_interrupt(vcpu);
   }
   
   void s390_vcpu_block(struct kvm_vcpu *vcpu)
@@@ -568,6 -605,26 +620,26 @@@ static int kvm_arch_vcpu_ioctl_get_one_
                 r = put_user(vcpu->arch.sie_block->ckc,
                              (u64 __user *)reg->addr);
                 break;
+       case KVM_REG_S390_PFTOKEN:
+               r = put_user(vcpu->arch.pfault_token,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_PFCOMPARE:
+               r = put_user(vcpu->arch.pfault_compare,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_PFSELECT:
+               r = put_user(vcpu->arch.pfault_select,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_PP:
+               r = put_user(vcpu->arch.sie_block->pp,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_GBEA:
+               r = put_user(vcpu->arch.sie_block->gbea,
+                            (u64 __user *)reg->addr);
+               break;
         default:
                 break;
         }
@@@ -597,6 -654,26 +669,26 @@@ static int kvm_arch_vcpu_ioctl_set_one_
                 r = get_user(vcpu->arch.sie_block->ckc,
                              (u64 __user *)reg->addr);
                 break;
+       case KVM_REG_S390_PFTOKEN:
+               r = get_user(vcpu->arch.pfault_token,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_PFCOMPARE:
+               r = get_user(vcpu->arch.pfault_compare,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_PFSELECT:
+               r = get_user(vcpu->arch.pfault_select,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_PP:
+               r = get_user(vcpu->arch.sie_block->pp,
+                            (u64 __user *)reg->addr);
+               break;
+       case KVM_REG_S390_GBEA:
+               r = get_user(vcpu->arch.sie_block->gbea,
+                            (u64 __user *)reg->addr);
+               break;
         default:
                 break;
         }
@@@ -715,10 -792,100 +807,100 @@@ static int kvm_s390_handle_requests(str
         return 0;
   }
   
+ static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
+ {
+       long rc;
+       hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
+       struct mm_struct *mm = current->mm;
+       down_read(&mm->mmap_sem);
+       rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
+       up_read(&mm->mmap_sem);
+       return rc;
+ }
+ 
+ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
+                                     unsigned long token)
+ {
+       struct kvm_s390_interrupt inti;
+       inti.parm64 = token;
+ 
+       if (start_token) {
+               inti.type = KVM_S390_INT_PFAULT_INIT;
+               WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
+       } else {
+               inti.type = KVM_S390_INT_PFAULT_DONE;
+               WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
+       }
+ }
+ 
+ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+                                    struct kvm_async_pf *work)
+ {
+       trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
+       __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+ }
+ 
+ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+                                struct kvm_async_pf *work)
+ {
+       trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
+       __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
+ }
+ 
+ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+                              struct kvm_async_pf *work)
+ {
+       /* s390 will always inject the page directly */
+ }
+ 
+ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+ {
+       /*
+        * s390 will always inject the page directly,
+        * but we still want check_async_completion to cleanup
+        */
+       return true;
+ }
+ 
+ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
+ {
+       hva_t hva;
+       struct kvm_arch_async_pf arch;
+       int rc;
+ 
+       if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+               return 0;
+       if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
+           vcpu->arch.pfault_compare)
+               return 0;
+       if (psw_extint_disabled(vcpu))
+               return 0;
+       if (kvm_cpu_has_interrupt(vcpu))
+               return 0;
+       if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+               return 0;
+       if (!vcpu->arch.gmap->pfault_enabled)
+               return 0;
+ 
+       hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
+       if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
+               return 0;
+ 
+       rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+       return rc;
+ }
+ 
   static int vcpu_pre_run(struct kvm_vcpu *vcpu)
   {
         int rc, cpuflags;
   
+       /*
+        * On s390 notifications for arriving pages will be delivered directly
+        * to the guest but the house keeping for completed pfaults is
+        * handled outside the worker.
+        */
+       kvm_check_async_pf_completion(vcpu);
+ 
         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
   
         if (need_resched())
@@@ -744,7 -911,7 +926,7 @@@
   
   static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
   {
-       int rc;
+       int rc = -1;
   
         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
                    vcpu->arch.sie_block->icptcode);
@@@ -758,7 -925,16 +940,16 @@@
                                                 current->thread.gmap_addr;
                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
                 rc = -EREMOTE;
-       } else {
+ 
+       } else if (current->thread.gmap_pfault) {
+               trace_kvm_s390_major_guest_pfault(vcpu);
+               current->thread.gmap_pfault = 0;
+               if (kvm_arch_setup_async_pf(vcpu) ||
+                   (kvm_arch_fault_in_sync(vcpu) >= 0))
+                       rc = 0;
+       }
+ 
+       if (rc == -1) {
                 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
                 trace_kvm_s390_sie_fault(vcpu);
                 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
@@@ -768,7 -944,8 +959,8 @@@
   
         if (rc == 0) {
                 if (kvm_is_ucontrol(vcpu->kvm))
-                       rc = -EOPNOTSUPP;
+                       /* Don't exit for host interrupts. */
+                       rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
                 else
                         rc = kvm_handle_sie_intercept(vcpu);
         }
@@@ -776,16 -953,6 +968,16 @@@
         return rc;
   }
   
+ +bool kvm_enabled_cmma(void)
+ +{
+ +      if (!MACHINE_IS_LPAR)
+ +              return false;
+ +      /* only enable for z10 and later */
+ +      if (!MACHINE_HAS_EDAT1)
+ +              return false;
+ +      return true;
+ +}
+ +
   static int __vcpu_run(struct kvm_vcpu *vcpu)
   {
         int rc, exit_reason;
@@@ -831,8 -998,6 +1023,6 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
   
         atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
   
-       BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
- 
         switch (kvm_run->exit_reason) {
         case KVM_EXIT_S390_SIEIC:
         case KVM_EXIT_UNKNOWN:
diff --combined arch/s390/kvm/kvm-s390.h

index 564514f410f45682272bdc5a3e5064306a9e9960,660e79f8f8e8db52a3296df9dbbe5b950339519a..3c1e2274d9eae858fce363cd5f89ddb699e1fa05
--- 1/arch/s390/kvm/kvm-s390.h
--- 2/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@@ -129,6 -129,7 +129,7 @@@ enum hrtimer_restart kvm_s390_idle_wake
   void kvm_s390_tasklet(unsigned long parm);
   void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
   void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
+ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
   int __must_check kvm_s390_inject_vm(struct kvm *kvm,
                                     struct kvm_s390_interrupt *s390int);
   int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
@@@ -136,6 -137,7 +137,7 @@@
   int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
   struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
                                                     u64 cr6, u64 schid);
+ int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
   
   /* implemented in priv.c */
   int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
@@@ -156,9 -158,12 +158,14 @@@ void s390_vcpu_block(struct kvm_vcpu *v
   void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
   void exit_sie(struct kvm_vcpu *vcpu);
   void exit_sie_sync(struct kvm_vcpu *vcpu);
+ +/* are we going to support cmma? */
+ +bool kvm_enabled_cmma(void);
   /* implemented in diag.c */
   int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
   
+ /* implemented in interrupt.c */
+ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
+ int psw_extint_disabled(struct kvm_vcpu *vcpu);
+ void kvm_s390_destroy_adapters(struct kvm *kvm);
+ 
   #endif
diff --combined arch/s390/kvm/priv.c

index aacb6b129914bc1c7d207d0587fc4fb2efe66ccf,ae9e8ee2155705051e42043cce43e18386acafcb..476e9e218f43ee5cfa2842951c845a0499c4834e
--- 1/arch/s390/kvm/priv.c
--- 2/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@@ -396,15 -396,10 +396,10 @@@ static int handle_stidp(struct kvm_vcp
   
   static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
   {
-       struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
         int cpus = 0;
         int n;
   
-       spin_lock(&fi->lock);
-       for (n = 0; n < KVM_MAX_VCPUS; n++)
-               if (fi->local_int[n])
-                       cpus++;
-       spin_unlock(&fi->lock);
+       cpus = atomic_read(&vcpu->kvm->online_vcpus);
   
         /* deal with other level 3 hypervisors */
         if (stsi(mem, 3, 2, 2))
@@@ -636,49 -631,8 +631,49 @@@ static int handle_pfmf(struct kvm_vcpu 
         return 0;
   }
   
+ +static int handle_essa(struct kvm_vcpu *vcpu)
+ +{
+ +      /* entries expected to be 1FF */
+ +      int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
+ +      unsigned long *cbrlo, cbrle;
+ +      struct gmap *gmap;
+ +      int i;
+ +
+ +      VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries);
+ +      gmap = vcpu->arch.gmap;
+ +      vcpu->stat.instruction_essa++;
+ +      if (!kvm_enabled_cmma() || !vcpu->arch.sie_block->cbrlo)
+ +              return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+ +
+ +      if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ +              return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+ +
+ +      if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
+ +              return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ +
+ +      /* Rewind PSW to repeat the ESSA instruction */
+ +      vcpu->arch.sie_block->gpsw.addr =
+ +              __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+ +      vcpu->arch.sie_block->cbrlo &= PAGE_MASK;       /* reset nceo */
+ +      cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
+ +      down_read(&gmap->mm->mmap_sem);
+ +      for (i = 0; i < entries; ++i) {
+ +              cbrle = cbrlo[i];
+ +              if (unlikely(cbrle & ~PAGE_MASK || cbrle < 2 * PAGE_SIZE))
+ +                      /* invalid entry */
+ +                      break;
+ +              /* try to free backing */
+ +              __gmap_zap(cbrle, gmap);
+ +      }
+ +      up_read(&gmap->mm->mmap_sem);
+ +      if (i < entries)
+ +              return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ +      return 0;
+ +}
+ +
   static const intercept_handler_t b9_handlers[256] = {
         [0x8d] = handle_epsw,
+ +      [0xab] = handle_essa,
         [0xaf] = handle_pfmf,
   };
   
diff --combined arch/x86/include/asm/xsave.h

index 6c1d7411eb009a5a96ef55004fdedf60aa1ef822,dcd047b629ec044655ff5963f00aa60134e655de..d949ef28c48bd9c423c5c668a1b0ce61ed70cac5
--- 1/arch/x86/include/asm/xsave.h
--- 2/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@@ -6,16 -6,15 +6,18 @@@
   
   #define XSTATE_CPUID          0x0000000d
   
- -#define XSTATE_FP     0x1
- -#define XSTATE_SSE    0x2
- -#define XSTATE_YMM    0x4
- -#define XSTATE_BNDREGS        0x8
- -#define XSTATE_BNDCSR 0x10
+ +#define XSTATE_FP             0x1
+ +#define XSTATE_SSE            0x2
+ +#define XSTATE_YMM            0x4
+ +#define XSTATE_BNDREGS                0x8
+ +#define XSTATE_BNDCSR         0x10
+ +#define XSTATE_OPMASK         0x20
+ +#define XSTATE_ZMM_Hi256      0x40
+ +#define XSTATE_Hi16_ZMM               0x80
   
   #define XSTATE_FPSSE  (XSTATE_FP | XSTATE_SSE)
+ /* Bit 63 of XCR0 is reserved for future expansion */
+ #define XSTATE_EXTEND_MASK    (~(XSTATE_FPSSE | (1ULL << 63)))
   
   #define FXSAVE_SIZE   512
   
@@@ -26,8 -25,7 +28,8 @@@
   #define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
   
   /* Supported features which support lazy state saving */
- -#define XSTATE_LAZY   (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
+ +#define XSTATE_LAZY   (XSTATE_FP | XSTATE_SSE | XSTATE_YMM                  \
+ +                      | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
   
   /* Supported features which require eager state saving */
   #define XSTATE_EAGER  (XSTATE_BNDREGS | XSTATE_BNDCSR)
diff --combined arch/x86/include/uapi/asm/msr-index.h

index 4924f4be2b992198995a3bf328f011de65dc2230,ed821ed45eb613c14abae840ea8b51114f8b87e0..c827ace3121bc0f7ff3dc9d4cc74024207adc68c
--- 1/arch/x86/include/uapi/asm/msr-index.h
--- 2/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@@ -295,6 -295,7 +295,7 @@@
   #define MSR_SMI_COUNT                 0x00000034
   #define MSR_IA32_FEATURE_CONTROL        0x0000003a
   #define MSR_IA32_TSC_ADJUST             0x0000003b
+ #define MSR_IA32_BNDCFGS              0x00000d90
   
   #define FEATURE_CONTROL_LOCKED                                (1<<0)
   #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX      (1<<1)
@@@ -368,58 -369,33 +369,58 @@@
   #define THERM_LOG_THRESHOLD1           (1 << 9)
   
   /* MISC_ENABLE bits: architectural */
- -#define MSR_IA32_MISC_ENABLE_FAST_STRING      (1ULL << 0)
- -#define MSR_IA32_MISC_ENABLE_TCC              (1ULL << 1)
- -#define MSR_IA32_MISC_ENABLE_EMON             (1ULL << 7)
- -#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL      (1ULL << 11)
- -#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL     (1ULL << 12)
- -#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP       (1ULL << 16)
- -#define MSR_IA32_MISC_ENABLE_MWAIT            (1ULL << 18)
- -#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID      (1ULL << 22)
- -#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE     (1ULL << 23)
- -#define MSR_IA32_MISC_ENABLE_XD_DISABLE               (1ULL << 34)
+ +#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT          0
+ +#define MSR_IA32_MISC_ENABLE_FAST_STRING              (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
+ +#define MSR_IA32_MISC_ENABLE_TCC_BIT                  1
+ +#define MSR_IA32_MISC_ENABLE_TCC                      (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT)
+ +#define MSR_IA32_MISC_ENABLE_EMON_BIT                 7
+ +#define MSR_IA32_MISC_ENABLE_EMON                     (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT)
+ +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT          11
+ +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL              (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT)
+ +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT         12
+ +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL             (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT)
+ +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT   16
+ +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP               (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT)
+ +#define MSR_IA32_MISC_ENABLE_MWAIT_BIT                        18
+ +#define MSR_IA32_MISC_ENABLE_MWAIT                    (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
+ +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT          22
+ +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID              (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT);
+ +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT         23
+ +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE             (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
+ +#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT           34
+ +#define MSR_IA32_MISC_ENABLE_XD_DISABLE                       (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT)
   
   /* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
- -#define MSR_IA32_MISC_ENABLE_X87_COMPAT               (1ULL << 2)
- -#define MSR_IA32_MISC_ENABLE_TM1              (1ULL << 3)
- -#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE       (1ULL << 4)
- -#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE  (1ULL << 6)
- -#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK    (1ULL << 8)
- -#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << 9)
- -#define MSR_IA32_MISC_ENABLE_FERR             (1ULL << 10)
- -#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX   (1ULL << 10)
- -#define MSR_IA32_MISC_ENABLE_TM2              (1ULL << 13)
- -#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << 19)
- -#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK   (1ULL << 20)
- -#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT      (1ULL << 24)
- -#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << 37)
- -#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE    (1ULL << 38)
- -#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE  (1ULL << 39)
+ +#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT           2
+ +#define MSR_IA32_MISC_ENABLE_X87_COMPAT                       (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT)
+ +#define MSR_IA32_MISC_ENABLE_TM1_BIT                  3
+ +#define MSR_IA32_MISC_ENABLE_TM1                      (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT)
+ +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT   4
+ +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE               (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT)
+ +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT      6
+ +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE          (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT)
+ +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT                8
+ +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK            (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT)
+ +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT     9
+ +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE         (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
+ +#define MSR_IA32_MISC_ENABLE_FERR_BIT                 10
+ +#define MSR_IA32_MISC_ENABLE_FERR                     (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT)
+ +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT               10
+ +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX           (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT)
+ +#define MSR_IA32_MISC_ENABLE_TM2_BIT                  13
+ +#define MSR_IA32_MISC_ENABLE_TM2                      (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT)
+ +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT     19
+ +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE         (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT)
+ +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT               20
+ +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK           (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT)
+ +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT          24
+ +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT              (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT)
+ +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT     37
+ +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE         (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT)
+ +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT                38
+ +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE            (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT)
+ +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT      39
+ +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE          (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
   
   #define MSR_IA32_TSC_DEADLINE         0x000006E0
   
diff --combined arch/x86/kvm/cpuid.c

index e5503d8aec1dac41f6ff7f97eb77e43beee6f38b,64fae65730f3a536f8586c9d0de26395c0c283db..bea60671ef8a8c17227e4c4124c0f52a7c8c6f63
--- 1/arch/x86/kvm/cpuid.c
--- 2/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@@ -28,7 -28,7 +28,7 @@@ static u32 xstate_required_size(u64 xst
         int feature_bit = 0;
         u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
   
-       xstate_bv &= ~XSTATE_FPSSE;
+       xstate_bv &= XSTATE_EXTEND_MASK;
         while (xstate_bv) {
                 if (xstate_bv & 0x1) {
                         u32 eax, ebx, ecx, edx;
@@@ -43,6 -43,16 +43,16 @@@
         return ret;
   }
   
+ u64 kvm_supported_xcr0(void)
+ {
+       u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
+ 
+       if (!kvm_x86_ops->mpx_supported())
+               xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
+ 
+       return xcr0;
+ }
+ 
   void kvm_update_cpuid(struct kvm_vcpu *vcpu)
   {
         struct kvm_cpuid_entry2 *best;
@@@ -73,9 -83,9 +83,9 @@@
         } else {
                 vcpu->arch.guest_supported_xcr0 =
                         (best->eax | ((u64)best->edx << 32)) &
-                       host_xcr0 & KVM_SUPPORTED_XCR0;
-               vcpu->arch.guest_xstate_size =
-                       xstate_required_size(vcpu->arch.guest_supported_xcr0);
+                       kvm_supported_xcr0();
+               vcpu->arch.guest_xstate_size = best->ebx =
+                       xstate_required_size(vcpu->arch.xcr0);
         }
   
         kvm_pmu_cpuid_update(vcpu);
@@@ -210,13 -220,6 +220,6 @@@ static void do_cpuid_1_ent(struct kvm_c
         entry->flags = 0;
   }
   
- static bool supported_xcr0_bit(unsigned bit)
- {
-       u64 mask = ((u64)1 << bit);
- 
-       return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
- }
- 
   #define F(x) bit(X86_FEATURE_##x)
   
   static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
@@@ -256,6 -259,7 +259,7 @@@ static inline int __do_cpuid_ent(struc
   #endif
         unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
         unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
+       unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
   
         /* cpuid 1.edx */
         const u32 kvm_supported_word0_x86_features =
@@@ -263,7 -267,7 +267,7 @@@
                 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
                 F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
                 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
- -              F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
+ +              F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
                 0 /* Reserved, DS, ACPI */ | F(MMX) |
                 F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
                 0 /* HTT, TM, Reserved, PBE */;
@@@ -303,7 -307,8 +307,8 @@@
         /* cpuid 7.0.ebx */
         const u32 kvm_supported_word9_x86_features =
                 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
-               F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
+               F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
+               F(ADX);
   
         /* all calls to cpuid_count() should be made on the same cpu */
         get_cpu();
@@@ -436,16 -441,18 +441,18 @@@
         }
         case 0xd: {
                 int idx, i;
+               u64 supported = kvm_supported_xcr0();
   
-               entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
-               entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
+               entry->eax &= supported;
+               entry->edx &= supported >> 32;
                 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                 for (idx = 1, i = 1; idx < 64; ++idx) {
+                       u64 mask = ((u64)1 << idx);
                         if (*nent >= maxnent)
                                 goto out;
   
                         do_cpuid_1_ent(&entry[i], function, idx);
-                       if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
+                       if (entry[i].eax == 0 || !(supported & mask))
                                 continue;
                         entry[i].flags |=
                                KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
diff --combined arch/x86/kvm/svm.c

index 2de1bc09a8d40a0508e7e364bc1de301215cc7c5,2136cb6ab1327a2ef645eb6451324b2ba8280a91..7f4f9c2badaefdf880b999fed48274748a808fd7
--- 1/arch/x86/kvm/svm.c
--- 2/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@@ -34,6 -34,7 +34,7 @@@
   #include <asm/perf_event.h>
   #include <asm/tlbflush.h>
   #include <asm/desc.h>
+ #include <asm/debugreg.h>
   #include <asm/kvm_para.h>
   
   #include <asm/virtext.h>
@@@ -303,20 -304,35 +304,35 @@@ static inline bool is_cr_intercept(stru
         return vmcb->control.intercept_cr & (1U << bit);
   }
   
- static inline void set_dr_intercept(struct vcpu_svm *svm, int bit)
+ static inline void set_dr_intercepts(struct vcpu_svm *svm)
   {
         struct vmcb *vmcb = get_host_vmcb(svm);
   
-       vmcb->control.intercept_dr |= (1U << bit);
+       vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
+               | (1 << INTERCEPT_DR1_READ)
+               | (1 << INTERCEPT_DR2_READ)
+               | (1 << INTERCEPT_DR3_READ)
+               | (1 << INTERCEPT_DR4_READ)
+               | (1 << INTERCEPT_DR5_READ)
+               | (1 << INTERCEPT_DR6_READ)
+               | (1 << INTERCEPT_DR7_READ)
+               | (1 << INTERCEPT_DR0_WRITE)
+               | (1 << INTERCEPT_DR1_WRITE)
+               | (1 << INTERCEPT_DR2_WRITE)
+               | (1 << INTERCEPT_DR3_WRITE)
+               | (1 << INTERCEPT_DR4_WRITE)
+               | (1 << INTERCEPT_DR5_WRITE)
+               | (1 << INTERCEPT_DR6_WRITE)
+               | (1 << INTERCEPT_DR7_WRITE);
   
         recalc_intercepts(svm);
   }
   
- static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit)
+ static inline void clr_dr_intercepts(struct vcpu_svm *svm)
   {
         struct vmcb *vmcb = get_host_vmcb(svm);
   
-       vmcb->control.intercept_dr &= ~(1U << bit);
+       vmcb->control.intercept_dr = 0;
   
         recalc_intercepts(svm);
   }
@@@ -1080,23 -1096,7 +1096,7 @@@ static void init_vmcb(struct vcpu_svm *
         set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
         set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
   
-       set_dr_intercept(svm, INTERCEPT_DR0_READ);
-       set_dr_intercept(svm, INTERCEPT_DR1_READ);
-       set_dr_intercept(svm, INTERCEPT_DR2_READ);
-       set_dr_intercept(svm, INTERCEPT_DR3_READ);
-       set_dr_intercept(svm, INTERCEPT_DR4_READ);
-       set_dr_intercept(svm, INTERCEPT_DR5_READ);
-       set_dr_intercept(svm, INTERCEPT_DR6_READ);
-       set_dr_intercept(svm, INTERCEPT_DR7_READ);
- 
-       set_dr_intercept(svm, INTERCEPT_DR0_WRITE);
-       set_dr_intercept(svm, INTERCEPT_DR1_WRITE);
-       set_dr_intercept(svm, INTERCEPT_DR2_WRITE);
-       set_dr_intercept(svm, INTERCEPT_DR3_WRITE);
-       set_dr_intercept(svm, INTERCEPT_DR4_WRITE);
-       set_dr_intercept(svm, INTERCEPT_DR5_WRITE);
-       set_dr_intercept(svm, INTERCEPT_DR6_WRITE);
-       set_dr_intercept(svm, INTERCEPT_DR7_WRITE);
+       set_dr_intercepts(svm);
   
         set_exception_intercept(svm, PF_VECTOR);
         set_exception_intercept(svm, UD_VECTOR);
@@@ -1684,6 -1684,21 +1684,21 @@@ static void svm_set_dr6(struct kvm_vcp
         mark_dirty(svm->vmcb, VMCB_DR);
   }
   
+ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+ 
+       get_debugreg(vcpu->arch.db[0], 0);
+       get_debugreg(vcpu->arch.db[1], 1);
+       get_debugreg(vcpu->arch.db[2], 2);
+       get_debugreg(vcpu->arch.db[3], 3);
+       vcpu->arch.dr6 = svm_get_dr6(vcpu);
+       vcpu->arch.dr7 = svm->vmcb->save.dr7;
+ 
+       vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
+       set_dr_intercepts(svm);
+ }
+ 
   static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
@@@ -2842,6 -2857,7 +2857,7 @@@ static int iret_interception(struct vcp
         clr_intercept(svm, INTERCEPT_IRET);
         svm->vcpu.arch.hflags |= HF_IRET_MASK;
         svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
+       kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
         return 1;
   }
   
@@@ -2974,6 -2990,17 +2990,17 @@@ static int dr_interception(struct vcpu_
         unsigned long val;
         int err;
   
+       if (svm->vcpu.guest_debug == 0) {
+               /*
+                * No more DR vmexits; force a reload of the debug registers
+                * and reenter on this instruction.  The next vmexit will
+                * retrieve the full state of the debug registers.
+                */
+               clr_dr_intercepts(svm);
+               svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
+               return 1;
+       }
+ 
         if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
                 return emulate_on_interception(svm);
   
@@@ -3002,8 -3029,10 +3029,8 @@@ static int cr8_write_interception(struc
         u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
         /* instruction emulation calls kvm_set_cr8() */
         r = cr_interception(svm);
- -      if (irqchip_in_kernel(svm->vcpu.kvm)) {
- -              clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+ +      if (irqchip_in_kernel(svm->vcpu.kvm))
                 return r;
- -      }
         if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
                 return r;
         kvm_run->exit_reason = KVM_EXIT_SET_TPR;
@@@ -3565,8 -3594,6 +3592,8 @@@ static void update_cr8_intercept(struc
         if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
                 return;
   
+ +      clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+ +
         if (irr == -1)
                 return;
   
@@@ -3649,7 -3676,7 +3676,7 @@@ static int svm_interrupt_allowed(struc
         return ret;
   }
   
- static int enable_irq_window(struct kvm_vcpu *vcpu)
+ static void enable_irq_window(struct kvm_vcpu *vcpu)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
   
@@@ -3663,16 -3690,15 +3690,15 @@@
                 svm_set_vintr(svm);
                 svm_inject_irq(svm, 0x0);
         }
-       return 0;
   }
   
- static int enable_nmi_window(struct kvm_vcpu *vcpu)
+ static void enable_nmi_window(struct kvm_vcpu *vcpu)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
   
         if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
             == HF_NMI_MASK)
-               return 0; /* IRET will cause a vm exit */
+               return; /* IRET will cause a vm exit */
   
         /*
          * Something prevents NMI from been injected. Single step over possible
@@@ -3681,7 -3707,6 +3707,6 @@@
         svm->nmi_singlestep = true;
         svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
         update_db_bp_intercept(vcpu);
-       return 0;
   }
   
   static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@@ -4064,6 -4089,11 +4089,11 @@@ static bool svm_invpcid_supported(void
         return false;
   }
   
+ static bool svm_mpx_supported(void)
+ {
+       return false;
+ }
+ 
   static bool svm_has_wbinvd_exit(void)
   {
         return true;
@@@ -4302,6 -4332,7 +4332,7 @@@ static struct kvm_x86_ops svm_x86_ops 
         .get_dr6 = svm_get_dr6,
         .set_dr6 = svm_set_dr6,
         .set_dr7 = svm_set_dr7,
+       .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
         .cache_reg = svm_cache_reg,
         .get_rflags = svm_get_rflags,
         .set_rflags = svm_set_rflags,
@@@ -4345,6 -4376,7 +4376,7 @@@
   
         .rdtscp_supported = svm_rdtscp_supported,
         .invpcid_supported = svm_invpcid_supported,
+       .mpx_supported = svm_mpx_supported,
   
         .set_supported_cpuid = svm_set_supported_cpuid,
   
diff --combined virt/kvm/kvm_main.c

index b5ec7fb986f6a560a258c99b8af3977aff936c2d,5fd4cf8e8888585033d44cf113d2931b4de08572..56baae8c2f56baf0f41bb7bb24b0b12679267d20
--- 1/virt/kvm/kvm_main.c
--- 2/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@@ -102,7 -102,7 +102,7 @@@ static void kvm_release_pfn_dirty(pfn_
   static void mark_page_dirty_in_slot(struct kvm *kvm,
                                     struct kvm_memory_slot *memslot, gfn_t gfn);
   
- -bool kvm_rebooting;
+ +__visible bool kvm_rebooting;
   EXPORT_SYMBOL_GPL(kvm_rebooting);
   
   static bool largepages_enabled = true;
@@@ -186,12 -186,9 +186,9 @@@ static bool make_all_cpus_request(struc
   
   void kvm_flush_remote_tlbs(struct kvm *kvm)
   {
-       long dirty_count = kvm->tlbs_dirty;
- 
-       smp_mb();
         if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
                 ++kvm->stat.remote_tlb_flush;
-       cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
+       kvm->tlbs_dirty = false;
   }
   EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
   
@@@ -1804,7 -1801,7 +1801,7 @@@ void kvm_vcpu_on_spin(struct kvm_vcpu *
                                 continue;
                         if (vcpu == me)
                                 continue;
-                       if (waitqueue_active(&vcpu->wq))
+                       if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
                                 continue;
                         if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
                                 continue;
@@@ -2283,6 -2280,11 +2280,11 @@@ static int kvm_ioctl_create_device(stru
         case KVM_DEV_TYPE_ARM_VGIC_V2:
                 ops = &kvm_arm_vgic_v2_ops;
                 break;
+ #endif
+ #ifdef CONFIG_S390
+       case KVM_DEV_TYPE_FLIC:
+               ops = &kvm_flic_ops;
+               break;
   #endif
         default:
                 return -ENODEV;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 2 Apr 2014 21:50:10 +0000 (14:50 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 2 Apr 2014 21:50:10 +0000 (14:50 -0700)
		1	2
arch/arm64/include/asm/kvm_arm.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/include/asm/reg.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_hv_rmhandlers.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/include/asm/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kernel/irq.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/diag.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/kvm-s390.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/kvm-s390.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/priv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/xsave.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/uapi/asm/msr-index.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/cpuid.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm.c	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/kvm_main.c	patch \|	diff1 \|	diff2 \|	blob \| history