Merge tag 'kvm-ppc-next-4.16-1' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Radim Krčmář <rkrcmar@redhat.com>

Thu, 1 Feb 2018 15:13:07 +0000 (16:13 +0100)

committer Radim Krčmář <rkrcmar@redhat.com>

Thu, 1 Feb 2018 15:13:07 +0000 (16:13 +0100)
author Radim Krčmář <rkrcmar@redhat.com>
Thu, 1 Feb 2018 15:13:07 +0000 (16:13 +0100)
committer Radim Krčmář <rkrcmar@redhat.com>
Thu, 1 Feb 2018 15:13:07 +0000 (16:13 +0100)
diff --combined Documentation/virtual/kvm/api.txt

index 70d3368adba9fbfaa143b9e1dd611e5d8cc11bb5,c6f9eebb79f2c053c43a484d2ec6e6586ccd265e..792fa8717d133e1aa7d6c73a8b948d53150e6d78
--- 1/Documentation/virtual/kvm/api.txt
--- 2/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@@ -1841,6 -1841,7 +1841,7 @@@ registers, find a list below
     PPC | KVM_REG_PPC_DBSR              | 32
     PPC   | KVM_REG_PPC_TIDR              | 64
     PPC   | KVM_REG_PPC_PSSCR             | 64
+   PPC   | KVM_REG_PPC_DEC_EXPIRY        | 64
     PPC   | KVM_REG_PPC_TM_GPR0           | 64
             ...
     PPC   | KVM_REG_PPC_TM_GPR31          | 64
@@@ -2901,19 -2902,14 +2902,19 @@@ userspace buffer and its length
   
   struct kvm_s390_irq_state {
         __u64 buf;
- -      __u32 flags;
+ +      __u32 flags;        /* will stay unused for compatibility reasons */
         __u32 len;
- -      __u32 reserved[4];
+ +      __u32 reserved[4];  /* will stay unused for compatibility reasons */
   };
   
   Userspace passes in the above struct and for each pending interrupt a
   struct kvm_s390_irq is copied to the provided buffer.
   
+ +The structure contains a flags and a reserved field for future extensions. As
+ +the kernel never checked for flags == 0 and QEMU never pre-zeroed flags and
+ +reserved, these fields can not be used in the future without breaking
+ +compatibility.
+ +
   If -ENOBUFS is returned the buffer provided was too small and userspace
   may retry with a bigger buffer.
   
@@@ -2937,14 -2933,10 +2938,14 @@@ containing a struct kvm_s390_irq_state
   
   struct kvm_s390_irq_state {
         __u64 buf;
+ +      __u32 flags;        /* will stay unused for compatibility reasons */
         __u32 len;
- -      __u32 pad;
+ +      __u32 reserved[4];  /* will stay unused for compatibility reasons */
   };
   
+ +The restrictions for flags and reserved apply as well.
+ +(see KVM_S390_GET_IRQ_STATE)
+ +
   The userspace memory referenced by buf contains a struct kvm_s390_irq
   for each interrupt to be injected into the guest.
   If one of the interrupts could not be injected for some reason the
@@@ -3403,103 -3395,6 +3404,103 @@@ invalid, if invalid pages are written t
   or if no page table is present for the addresses (e.g. when using
   hugepages).
   
+ +4.109 KVM_PPC_GET_CPU_CHAR
+ +
+ +Capability: KVM_CAP_PPC_GET_CPU_CHAR
+ +Architectures: powerpc
+ +Type: vm ioctl
+ +Parameters: struct kvm_ppc_cpu_char (out)
+ +Returns: 0 on successful completion
+ +       -EFAULT if struct kvm_ppc_cpu_char cannot be written
+ +
+ +This ioctl gives userspace information about certain characteristics
+ +of the CPU relating to speculative execution of instructions and
+ +possible information leakage resulting from speculative execution (see
+ +CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754).  The information is
+ +returned in struct kvm_ppc_cpu_char, which looks like this:
+ +
+ +struct kvm_ppc_cpu_char {
+ +      __u64   character;              /* characteristics of the CPU */
+ +      __u64   behaviour;              /* recommended software behaviour */
+ +      __u64   character_mask;         /* valid bits in character */
+ +      __u64   behaviour_mask;         /* valid bits in behaviour */
+ +};
+ +
+ +For extensibility, the character_mask and behaviour_mask fields
+ +indicate which bits of character and behaviour have been filled in by
+ +the kernel.  If the set of defined bits is extended in future then
+ +userspace will be able to tell whether it is running on a kernel that
+ +knows about the new bits.
+ +
+ +The character field describes attributes of the CPU which can help
+ +with preventing inadvertent information disclosure - specifically,
+ +whether there is an instruction to flash-invalidate the L1 data cache
+ +(ori 30,30,0 or mtspr SPRN_TRIG2,rN), whether the L1 data cache is set
+ +to a mode where entries can only be used by the thread that created
+ +them, whether the bcctr[l] instruction prevents speculation, and
+ +whether a speculation barrier instruction (ori 31,31,0) is provided.
+ +
+ +The behaviour field describes actions that software should take to
+ +prevent inadvertent information disclosure, and thus describes which
+ +vulnerabilities the hardware is subject to; specifically whether the
+ +L1 data cache should be flushed when returning to user mode from the
+ +kernel, and whether a speculation barrier should be placed between an
+ +array bounds check and the array access.
+ +
+ +These fields use the same bit definitions as the new
+ +H_GET_CPU_CHARACTERISTICS hypercall.
+ +
+ +4.110 KVM_MEMORY_ENCRYPT_OP
+ +
+ +Capability: basic
+ +Architectures: x86
+ +Type: system
+ +Parameters: an opaque platform specific structure (in/out)
+ +Returns: 0 on success; -1 on error
+ +
+ +If the platform supports creating encrypted VMs then this ioctl can be used
+ +for issuing platform-specific memory encryption commands to manage those
+ +encrypted VMs.
+ +
+ +Currently, this ioctl is used for issuing Secure Encrypted Virtualization
+ +(SEV) commands on AMD Processors. The SEV commands are defined in
+ +Documentation/virtual/kvm/amd-memory-encryption.txt.
+ +
+ +4.111 KVM_MEMORY_ENCRYPT_REG_REGION
+ +
+ +Capability: basic
+ +Architectures: x86
+ +Type: system
+ +Parameters: struct kvm_enc_region (in)
+ +Returns: 0 on success; -1 on error
+ +
+ +This ioctl can be used to register a guest memory region which may
+ +contain encrypted data (e.g. guest RAM, SMRAM etc).
+ +
+ +It is used in the SEV-enabled guest. When encryption is enabled, a guest
+ +memory region may contain encrypted data. The SEV memory encryption
+ +engine uses a tweak such that two identical plaintext pages, each at
+ +different locations will have differing ciphertexts. So swapping or
+ +moving ciphertext of those pages will not result in plaintext being
+ +swapped. So relocating (or migrating) physical backing pages for the SEV
+ +guest will require some additional steps.
+ +
+ +Note: The current SEV key management spec does not provide commands to
+ +swap or migrate (move) ciphertext pages. Hence, for now we pin the guest
+ +memory region registered with the ioctl.
+ +
+ +4.112 KVM_MEMORY_ENCRYPT_UNREG_REGION
+ +
+ +Capability: basic
+ +Architectures: x86
+ +Type: system
+ +Parameters: struct kvm_enc_region (in)
+ +Returns: 0 on success; -1 on error
+ +
+ +This ioctl can be used to unregister the guest memory region registered
+ +with KVM_MEMORY_ENCRYPT_REG_REGION ioctl above.
+ +
+ +
   5. The kvm_run structure
   ------------------------
   
diff --combined arch/powerpc/include/uapi/asm/kvm.h

index 637b7263cb867f09618cc2a5e7b525686a0ea267,8aaec831053af0bf4e6dcb6a8fda63737f65a93d..833ed9a16adfd03e0b6cb70adc19fe03055f7344
--- 1/arch/powerpc/include/uapi/asm/kvm.h
--- 2/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@@ -443,31 -443,6 +443,31 @@@ struct kvm_ppc_rmmu_info 
         __u32   ap_encodings[8];
   };
   
+ +/* For KVM_PPC_GET_CPU_CHAR */
+ +struct kvm_ppc_cpu_char {
+ +      __u64   character;              /* characteristics of the CPU */
+ +      __u64   behaviour;              /* recommended software behaviour */
+ +      __u64   character_mask;         /* valid bits in character */
+ +      __u64   behaviour_mask;         /* valid bits in behaviour */
+ +};
+ +
+ +/*
+ + * Values for character and character_mask.
+ + * These are identical to the values used by H_GET_CPU_CHARACTERISTICS.
+ + */
+ +#define KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31               (1ULL << 63)
+ +#define KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED    (1ULL << 62)
+ +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30      (1ULL << 61)
+ +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2      (1ULL << 60)
+ +#define KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV      (1ULL << 59)
+ +#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED     (1ULL << 58)
+ +#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF    (1ULL << 57)
+ +#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS      (1ULL << 56)
+ +
+ +#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY     (1ULL << 63)
+ +#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR                (1ULL << 62)
+ +#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR   (1ULL << 61)
+ +
   /* Per-vcpu XICS interrupt controller state */
   #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
   
@@@ -632,6 -607,8 +632,8 @@@
   #define KVM_REG_PPC_TIDR      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
   #define KVM_REG_PPC_PSSCR     (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
   
+ #define KVM_REG_PPC_DEC_EXPIRY        (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+ 
   /* Transactional Memory checkpointed state:
    * This is all GPRs, all VSX regs and a subset of SPRs
    */
diff --combined arch/powerpc/kernel/asm-offsets.c

index f390d57cf2e1a711335bbd66cf7819e9dcd8442f,1672dffd94e2e809244c3e722e188d4e46d17163..ff6ce2fd7579434710bf6f1f5d5a15f66ff110ae
--- 1/arch/powerpc/kernel/asm-offsets.c
--- 2/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@@ -237,11 -237,6 +237,11 @@@ int main(void
         OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
         OFFSET(PACA_IN_MCE, paca_struct, in_mce);
         OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
+ +      OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
+ +      OFFSET(PACA_EXRFI, paca_struct, exrfi);
+ +      OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
+ +      OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
+ +
   #endif
         OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
         OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
@@@ -519,6 -514,7 +519,7 @@@
         OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
         OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
         OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
+       OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
         OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
         OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
         OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
@@@ -738,6 -734,9 +739,9 @@@
         DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
                                             arch.xive_cam_word));
         DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
+       DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
+       DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr));
+       DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr));
   #endif
   
   #ifdef CONFIG_KVM_EXIT_TIMING
diff --combined arch/powerpc/kvm/book3s_hv_rmhandlers.S

index 9c61f736c75b2d0761ec4f9d2e385df75b6dc882,a7a20b85d8eb8dae89ea1d3776cc7782e7b0b925..b64f10a5f5e7d74d713689f43e186909fe7ead68
--- 1/arch/powerpc/kvm/book3s_hv_rmhandlers.S
--- 2/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@@ -79,7 -79,7 +79,7 @@@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline
         mtmsrd  r0,1            /* clear RI in MSR */
         mtsrr0  r5
         mtsrr1  r6
- -      RFI
+ +      RFI_TO_KERNEL
   
   kvmppc_call_hv_entry:
   BEGIN_FTR_SECTION
@@@ -199,7 -199,7 +199,7 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S
         mtmsrd  r6, 1                   /* Clear RI in MSR */
         mtsrr0  r8
         mtsrr1  r7
- -      RFI
+ +      RFI_TO_KERNEL
   
         /* Virtual-mode return */
   .Lvirt_return:
@@@ -617,13 -617,6 +617,6 @@@ kvmppc_hv_entry
         lbz     r0, KVM_RADIX(r9)
         cmpwi   cr7, r0, 0
   
-       /* Clear out SLB if hash */
-       bne     cr7, 2f
-       li      r6,0
-       slbmte  r6,r6
-       slbia
-       ptesync
- 2:
         /*
          * POWER7/POWER8 host -> guest partition switch code.
          * We don't have to lock against concurrent tlbies,
@@@ -738,19 -731,6 +731,6 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S
   10:   cmpdi   r4, 0
         beq     kvmppc_primary_no_guest
   kvmppc_got_guest:
- 
-       /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
-       lwz     r5,VCPU_SLB_MAX(r4)
-       cmpwi   r5,0
-       beq     9f
-       mtctr   r5
-       addi    r6,r4,VCPU_SLB
- 1:    ld      r8,VCPU_SLB_E(r6)
-       ld      r9,VCPU_SLB_V(r6)
-       slbmte  r9,r8
-       addi    r6,r6,VCPU_SLB_SIZE
-       bdnz    1b
- 9:
         /* Increment yield count if they have a VPA */
         ld      r3, VCPU_VPA(r4)
         cmpdi   r3, 0
@@@ -957,7 -937,6 +937,6 @@@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_
         mftb    r7
         subf    r3,r7,r8
         mtspr   SPRN_DEC,r3
-       std     r3,VCPU_DEC(r4)
   
         ld      r5, VCPU_SPRG0(r4)
         ld      r6, VCPU_SPRG1(r4)
@@@ -1018,6 -997,29 +997,29 @@@
         cmpdi   r3, 512         /* 1 microsecond */
         blt     hdec_soon
   
+       /* For hash guest, clear out and reload the SLB */
+       ld      r6, VCPU_KVM(r4)
+       lbz     r0, KVM_RADIX(r6)
+       cmpwi   r0, 0
+       bne     9f
+       li      r6, 0
+       slbmte  r6, r6
+       slbia
+       ptesync
+ 
+       /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
+       lwz     r5,VCPU_SLB_MAX(r4)
+       cmpwi   r5,0
+       beq     9f
+       mtctr   r5
+       addi    r6,r4,VCPU_SLB
+ 1:    ld      r8,VCPU_SLB_E(r6)
+       ld      r9,VCPU_SLB_V(r6)
+       slbmte  r9,r8
+       addi    r6,r6,VCPU_SLB_SIZE
+       bdnz    1b
+ 9:
+ 
   #ifdef CONFIG_KVM_XICS
         /* We are entering the guest on that thread, push VCPU to XIVE */
         ld      r10, HSTATE_XIVE_TIMA_PHYS(r13)
@@@ -1031,8 -1033,53 +1033,53 @@@
         li      r9, TM_QW1_OS + TM_WORD2
         stwcix  r11,r9,r10
         li      r9, 1
-       stw     r9, VCPU_XIVE_PUSHED(r4)
+       stb     r9, VCPU_XIVE_PUSHED(r4)
         eieio
+ 
+       /*
+        * We clear the irq_pending flag. There is a small chance of a
+        * race vs. the escalation interrupt happening on another
+        * processor setting it again, but the only consequence is to
+        * cause a spurrious wakeup on the next H_CEDE which is not an
+        * issue.
+        */
+       li      r0,0
+       stb     r0, VCPU_IRQ_PENDING(r4)
+ 
+       /*
+        * In single escalation mode, if the escalation interrupt is
+        * on, we mask it.
+        */
+       lbz     r0, VCPU_XIVE_ESC_ON(r4)
+       cmpwi   r0,0
+       beq     1f
+       ld      r10, VCPU_XIVE_ESC_RADDR(r4)
+       li      r9, XIVE_ESB_SET_PQ_01
+       ldcix   r0, r10, r9
+       sync
+ 
+       /* We have a possible subtle race here: The escalation interrupt might
+        * have fired and be on its way to the host queue while we mask it,
+        * and if we unmask it early enough (re-cede right away), there is
+        * a theorical possibility that it fires again, thus landing in the
+        * target queue more than once which is a big no-no.
+        *
+        * Fortunately, solving this is rather easy. If the above load setting
+        * PQ to 01 returns a previous value where P is set, then we know the
+        * escalation interrupt is somewhere on its way to the host. In that
+        * case we simply don't clear the xive_esc_on flag below. It will be
+        * eventually cleared by the handler for the escalation interrupt.
+        *
+        * Then, when doing a cede, we check that flag again before re-enabling
+        * the escalation interrupt, and if set, we abort the cede.
+        */
+       andi.   r0, r0, XIVE_ESB_VAL_P
+       bne-    1f
+ 
+       /* Now P is 0, we can clear the flag */
+       li      r0, 0
+       stb     r0, VCPU_XIVE_ESC_ON(r4)
+ 1:
   no_xive:
   #endif /* CONFIG_KVM_XICS */
   
@@@ -1167,7 -1214,8 +1214,7 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300
   
         ld      r0, VCPU_GPR(R0)(r4)
         ld      r4, VCPU_GPR(R4)(r4)
- -
- -      hrfid
+ +      HRFI_TO_GUEST
         b       .
   
   secondary_too_late:
@@@ -1193,7 -1241,7 +1240,7 @@@ hdec_soon
         addi    r3, r4, VCPU_TB_RMEXIT
         bl      kvmhv_accumulate_time
   #endif
-       b       guest_exit_cont
+       b       guest_bypass
   
   /******************************************************************************
    *                                                                            *
@@@ -1423,15 -1471,35 +1470,35 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300
         blt     deliver_guest_interrupt
   
   guest_exit_cont:              /* r9 = vcpu, r12 = trap, r13 = paca */
+       /* Save more register state  */
+       mfdar   r6
+       mfdsisr r7
+       std     r6, VCPU_DAR(r9)
+       stw     r7, VCPU_DSISR(r9)
+       /* don't overwrite fault_dar/fault_dsisr if HDSI */
+       cmpwi   r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
+       beq     mc_cont
+       std     r6, VCPU_FAULT_DAR(r9)
+       stw     r7, VCPU_FAULT_DSISR(r9)
+ 
+       /* See if it is a machine check */
+       cmpwi   r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+       beq     machine_check_realmode
+ mc_cont:
+ #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       addi    r3, r9, VCPU_TB_RMEXIT
+       mr      r4, r9
+       bl      kvmhv_accumulate_time
+ #endif
   #ifdef CONFIG_KVM_XICS
         /* We are exiting, pull the VP from the XIVE */
-       lwz     r0, VCPU_XIVE_PUSHED(r9)
+       lbz     r0, VCPU_XIVE_PUSHED(r9)
         cmpwi   cr0, r0, 0
         beq     1f
         li      r7, TM_SPC_PULL_OS_CTX
         li      r6, TM_QW1_OS
         mfmsr   r0
-       andi.   r0, r0, MSR_IR          /* in real mode? */
+       andi.   r0, r0, MSR_DR          /* in real mode? */
         beq     2f
         ld      r10, HSTATE_XIVE_TIMA_VIRT(r13)
         cmpldi  cr0, r10, 0
@@@ -1454,33 -1522,42 +1521,42 @@@
         /* Fixup some of the state for the next load */
         li      r10, 0
         li      r0, 0xff
-       stw     r10, VCPU_XIVE_PUSHED(r9)
+       stb     r10, VCPU_XIVE_PUSHED(r9)
         stb     r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
         stb     r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
         eieio
   1:
   #endif /* CONFIG_KVM_XICS */
-       /* Save more register state  */
-       mfdar   r6
-       mfdsisr r7
-       std     r6, VCPU_DAR(r9)
-       stw     r7, VCPU_DSISR(r9)
-       /* don't overwrite fault_dar/fault_dsisr if HDSI */
-       cmpwi   r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
-       beq     mc_cont
-       std     r6, VCPU_FAULT_DAR(r9)
-       stw     r7, VCPU_FAULT_DSISR(r9)
   
-       /* See if it is a machine check */
-       cmpwi   r12, BOOK3S_INTERRUPT_MACHINE_CHECK
-       beq     machine_check_realmode
- mc_cont:
- #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-       addi    r3, r9, VCPU_TB_RMEXIT
-       mr      r4, r9
-       bl      kvmhv_accumulate_time
- #endif
+       /* For hash guest, read the guest SLB and save it away */
+       ld      r5, VCPU_KVM(r9)
+       lbz     r0, KVM_RADIX(r5)
+       li      r5, 0
+       cmpwi   r0, 0
+       bne     3f                      /* for radix, save 0 entries */
+       lwz     r0,VCPU_SLB_NR(r9)      /* number of entries in SLB */
+       mtctr   r0
+       li      r6,0
+       addi    r7,r9,VCPU_SLB
+ 1:    slbmfee r8,r6
+       andis.  r0,r8,SLB_ESID_V@h
+       beq     2f
+       add     r8,r8,r6                /* put index in */
+       slbmfev r3,r6
+       std     r8,VCPU_SLB_E(r7)
+       std     r3,VCPU_SLB_V(r7)
+       addi    r7,r7,VCPU_SLB_SIZE
+       addi    r5,r5,1
+ 2:    addi    r6,r6,1
+       bdnz    1b
+       /* Finally clear out the SLB */
+       li      r0,0
+       slbmte  r0,r0
+       slbia
+       ptesync
+ 3:    stw     r5,VCPU_SLB_MAX(r9)
   
+ guest_bypass:
         mr      r3, r12
         /* Increment exit count, poke other threads to exit */
         bl      kvmhv_commence_exit
@@@ -1501,31 -1578,6 +1577,6 @@@
         ori     r6,r6,1
         mtspr   SPRN_CTRLT,r6
   4:
-       /* Check if we are running hash or radix and store it in cr2 */
-       ld      r5, VCPU_KVM(r9)
-       lbz     r0, KVM_RADIX(r5)
-       cmpwi   cr2,r0,0
- 
-       /* Read the guest SLB and save it away */
-       li      r5, 0
-       bne     cr2, 3f                 /* for radix, save 0 entries */
-       lwz     r0,VCPU_SLB_NR(r9)      /* number of entries in SLB */
-       mtctr   r0
-       li      r6,0
-       addi    r7,r9,VCPU_SLB
- 1:    slbmfee r8,r6
-       andis.  r0,r8,SLB_ESID_V@h
-       beq     2f
-       add     r8,r8,r6                /* put index in */
-       slbmfev r3,r6
-       std     r8,VCPU_SLB_E(r7)
-       std     r3,VCPU_SLB_V(r7)
-       addi    r7,r7,VCPU_SLB_SIZE
-       addi    r5,r5,1
- 2:    addi    r6,r6,1
-       bdnz    1b
- 3:    stw     r5,VCPU_SLB_MAX(r9)
- 
         /*
          * Save the guest PURR/SPURR
          */
@@@ -1803,7 -1855,7 +1854,7 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300
         ld      r5, VCPU_KVM(r9)
         lbz     r0, KVM_RADIX(r5)
         cmpwi   cr2, r0, 0
-       beq     cr2, 3f
+       beq     cr2, 4f
   
         /* Radix: Handle the case where the guest used an illegal PID */
         LOAD_REG_ADDR(r4, mmu_base_pid)
@@@ -1839,15 -1891,9 +1890,9 @@@
   BEGIN_FTR_SECTION
         PPC_INVALIDATE_ERAT
   END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
-       b       4f
+ 4:
   #endif /* CONFIG_PPC_RADIX_MMU */
   
-       /* Hash: clear out SLB */
- 3:    li      r5,0
-       slbmte  r5,r5
-       slbia
-       ptesync
- 4:
         /*
          * POWER7/POWER8 guest -> host partition switch code.
          * We don't have to lock against tlbies but we do
@@@ -1908,16 -1954,17 +1953,17 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S
         bne     27f
         bl      kvmppc_realmode_hmi_handler
         nop
+       cmpdi   r3, 0
         li      r12, BOOK3S_INTERRUPT_HMI
         /*
-        * At this point kvmppc_realmode_hmi_handler would have resync-ed
-        * the TB. Hence it is not required to subtract guest timebase
-        * offset from timebase. So, skip it.
+        * At this point kvmppc_realmode_hmi_handler may have resync-ed
+        * the TB, and if it has, we must not subtract the guest timebase
+        * offset from the timebase. So, skip it.
          *
          * Also, do not call kvmppc_subcore_exit_guest() because it has
          * been invoked as part of kvmppc_realmode_hmi_handler().
          */
-       b       30f
+       beq     30f
   
   27:
         /* Subtract timebase offset from timebase */
@@@ -2744,7 -2791,32 +2790,32 @@@ kvm_cede_prodded
         /* we've ceded but we want to give control to the host */
   kvm_cede_exit:
         ld      r9, HSTATE_KVM_VCPU(r13)
-       b       guest_exit_cont
+ #ifdef CONFIG_KVM_XICS
+       /* Abort if we still have a pending escalation */
+       lbz     r5, VCPU_XIVE_ESC_ON(r9)
+       cmpwi   r5, 0
+       beq     1f
+       li      r0, 0
+       stb     r0, VCPU_CEDED(r9)
+ 1:    /* Enable XIVE escalation */
+       li      r5, XIVE_ESB_SET_PQ_00
+       mfmsr   r0
+       andi.   r0, r0, MSR_DR          /* in real mode? */
+       beq     1f
+       ld      r10, VCPU_XIVE_ESC_VADDR(r9)
+       cmpdi   r10, 0
+       beq     3f
+       ldx     r0, r10, r5
+       b       2f
+ 1:    ld      r10, VCPU_XIVE_ESC_RADDR(r9)
+       cmpdi   r10, 0
+       beq     3f
+       ldcix   r0, r10, r5
+ 2:    sync
+       li      r0, 1
+       stb     r0, VCPU_XIVE_ESC_ON(r9)
+ #endif /* CONFIG_KVM_XICS */
+ 3:    b       guest_exit_cont
   
         /* Try to handle a machine check in real mode */
   machine_check_realmode:
@@@ -3319,7 -3391,7 +3390,7 @@@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_
         ld      r4, PACAKMSR(r13)
         mtspr   SPRN_SRR0, r3
         mtspr   SPRN_SRR1, r4
- -      rfid
+ +      RFI_TO_KERNEL
   9:    addi    r3, r1, STACK_FRAME_OVERHEAD
         bl      kvmppc_bad_interrupt
         b       9b
diff --combined arch/powerpc/kvm/book3s_xive.c

index 0d750d274c4e21a3324eb3505bbd73c86a58cdc9,7a047bc88f11032eabcf2ca7f74b8c411e95d68d..badfdbb857a28cfcf4a25ecc0144639477775efe
--- 1/arch/powerpc/kvm/book3s_xive.c
--- 2/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@@ -84,12 -84,22 +84,22 @@@ static irqreturn_t xive_esc_irq(int irq
   {
         struct kvm_vcpu *vcpu = data;
   
-       /* We use the existing H_PROD mechanism to wake up the target */
-       vcpu->arch.prodded = 1;
+       vcpu->arch.irq_pending = 1;
         smp_mb();
         if (vcpu->arch.ceded)
                 kvmppc_fast_vcpu_kick(vcpu);
   
+       /* Since we have the no-EOI flag, the interrupt is effectively
+        * disabled now. Clearing xive_esc_on means we won't bother
+        * doing so on the next entry.
+        *
+        * This also allows the entry code to know that if a PQ combination
+        * of 10 is observed while xive_esc_on is true, it means the queue
+        * contains an unprocessed escalation interrupt. We don't make use of
+        * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
+        */
+       vcpu->arch.xive_esc_on = false;
+ 
         return IRQ_HANDLED;
   }
   
@@@ -112,19 -122,21 +122,21 @@@ static int xive_attach_escalation(struc
                 return -EIO;
         }
   
-       /*
-        * Future improvement: start with them disabled
-        * and handle DD2 and later scheme of merged escalation
-        * interrupts
-        */
-       name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
-                        vcpu->kvm->arch.lpid, xc->server_num, prio);
+       if (xc->xive->single_escalation)
+               name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
+                                vcpu->kvm->arch.lpid, xc->server_num);
+       else
+               name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+                                vcpu->kvm->arch.lpid, xc->server_num, prio);
         if (!name) {
                 pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
                        prio, xc->server_num);
                 rc = -ENOMEM;
                 goto error;
         }
+ 
+       pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
+ 
         rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
                          IRQF_NO_THREAD, name, vcpu);
         if (rc) {
@@@ -133,6 -145,25 +145,25 @@@
                 goto error;
         }
         xc->esc_virq_names[prio] = name;
+ 
+       /* In single escalation mode, we grab the ESB MMIO of the
+        * interrupt and mask it. Also populate the VCPU v/raddr
+        * of the ESB page for use by asm entry/exit code. Finally
+        * set the XIVE_IRQ_NO_EOI flag which will prevent the
+        * core code from performing an EOI on the escalation
+        * interrupt, thus leaving it effectively masked after
+        * it fires once.
+        */
+       if (xc->xive->single_escalation) {
+               struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
+               struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+ 
+               xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
+               vcpu->arch.xive_esc_raddr = xd->eoi_page;
+               vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
+               xd->flags |= XIVE_IRQ_NO_EOI;
+       }
+ 
         return 0;
   error:
         irq_dispose_mapping(xc->esc_virq[prio]);
@@@ -191,12 -222,12 +222,12 @@@ static int xive_check_provisioning(stru
   
         pr_devel("Provisioning prio... %d\n", prio);
   
-       /* Provision each VCPU and enable escalations */
+       /* Provision each VCPU and enable escalations if needed */
         kvm_for_each_vcpu(i, vcpu, kvm) {
                 if (!vcpu->arch.xive_vcpu)
                         continue;
                 rc = xive_provision_queue(vcpu, prio);
-               if (rc == 0)
+               if (rc == 0 && !xive->single_escalation)
                         xive_attach_escalation(vcpu, prio);
                 if (rc)
                         return rc;
@@@ -725,8 -756,7 +756,8 @@@ u64 kvmppc_xive_get_icp(struct kvm_vcp
   
         /* Return the per-cpu state for state saving/migration */
         return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
- -             (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
+ +             (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
+ +             (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
   }
   
   int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
@@@ -1082,6 -1112,7 +1113,7 @@@ int kvmppc_xive_connect_vcpu(struct kvm
         /* Allocate IPI */
         xc->vp_ipi = xive_native_alloc_irq();
         if (!xc->vp_ipi) {
+               pr_err("Failed to allocate xive irq for VCPU IPI\n");
                 r = -EIO;
                 goto bail;
         }
@@@ -1091,19 -1122,34 +1123,34 @@@
         if (r)
                 goto bail;
   
+       /*
+        * Enable the VP first as the single escalation mode will
+        * affect escalation interrupts numbering
+        */
+       r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+       if (r) {
+               pr_err("Failed to enable VP in OPAL, err %d\n", r);
+               goto bail;
+       }
+ 
         /*
          * Initialize queues. Initially we set them all for no queueing
          * and we enable escalation for queue 0 only which we'll use for
          * our mfrr change notifications. If the VCPU is hot-plugged, we
-        * do handle provisioning however.
+        * do handle provisioning however based on the existing "map"
+        * of enabled queues.
          */
         for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
                 struct xive_q *q = &xc->queues[i];
   
+               /* Single escalation, no queue 7 */
+               if (i == 7 && xive->single_escalation)
+                       break;
+ 
                 /* Is queue already enabled ? Provision it */
                 if (xive->qmap & (1 << i)) {
                         r = xive_provision_queue(vcpu, i);
-                       if (r == 0)
+                       if (r == 0 && !xive->single_escalation)
                                 xive_attach_escalation(vcpu, i);
                         if (r)
                                 goto bail;
@@@ -1123,11 -1169,6 +1170,6 @@@
         if (r)
                 goto bail;
   
-       /* Enable the VP */
-       r = xive_native_enable_vp(xc->vp_id);
-       if (r)
-               goto bail;
- 
         /* Route the IPI */
         r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
         if (!r)
@@@ -1474,6 -1515,7 +1516,7 @@@ static int xive_set_source(struct kvmpp
   
         pr_devel("  val=0x016%llx (server=0x%x, guest_prio=%d)\n",
                  val, server, guest_prio);
+ 
         /*
          * If the source doesn't already have an IPI, allocate
          * one and get the corresponding data
@@@ -1559,7 -1601,7 +1602,7 @@@
   
         /*
          * Restore P and Q. If the interrupt was pending, we
- -       * force both P and Q, which will trigger a resend.
+ +       * force Q and !P, which will trigger a resend.
          *
          * That means that a guest that had both an interrupt
          * pending (queued) and Q set will restore with only
@@@ -1567,7 -1609,7 +1610,7 @@@
          * is perfectly fine as coalescing interrupts that haven't
          * been presented yet is always allowed.
          */
- -      if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+ +      if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
                 state->old_p = true;
         if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
                 state->old_q = true;
@@@ -1762,6 -1804,8 +1805,8 @@@ static int kvmppc_xive_create(struct kv
         if (xive->vp_base == XIVE_INVALID_VP)
                 ret = -ENOMEM;
   
+       xive->single_escalation = xive_native_has_single_escalation();
+ 
         if (ret) {
                 kfree(xive);
                 return ret;
@@@ -1795,6 -1839,7 +1840,7 @@@ static int xive_debug_show(struct seq_f
   
         kvm_for_each_vcpu(i, vcpu, kvm) {
                 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+               unsigned int i;
   
                 if (!xc)
                         continue;
@@@ -1804,6 -1849,33 +1850,33 @@@
                            xc->server_num, xc->cppr, xc->hw_cppr,
                            xc->mfrr, xc->pending,
                            xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
+               for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+                       struct xive_q *q = &xc->queues[i];
+                       u32 i0, i1, idx;
+ 
+                       if (!q->qpage && !xc->esc_virq[i])
+                               continue;
+ 
+                       seq_printf(m, " [q%d]: ", i);
+ 
+                       if (q->qpage) {
+                               idx = q->idx;
+                               i0 = be32_to_cpup(q->qpage + idx);
+                               idx = (idx + 1) & q->msk;
+                               i1 = be32_to_cpup(q->qpage + idx);
+                               seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
+                       }
+                       if (xc->esc_virq[i]) {
+                               struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
+                               struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+                               u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+                               seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
+                                          (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
+                                          (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
+                                          xc->esc_virq[i], pq, xd->eoi_page);
+                               seq_printf(m, "\n");
+                       }
+               }
   
                 t_rm_h_xirr += xc->stat_rm_h_xirr;
                 t_rm_h_ipoll += xc->stat_rm_h_ipoll;
diff --combined arch/powerpc/kvm/powerpc.c

index 545a230f675f652702193eb6089e6afe791ece46,77eb25abc60160a62963c06b4564e2aa1a79dbc7..748562ec9a0425f19f226da9e14833da0602bab0
--- 1/arch/powerpc/kvm/powerpc.c
--- 2/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@@ -39,10 -39,6 +39,10 @@@
   #include <asm/iommu.h>
   #include <asm/switch_to.h>
   #include <asm/xive.h>
+ +#ifdef CONFIG_PPC_PSERIES
+ +#include <asm/hvcall.h>
+ +#include <asm/plpar_wrappers.h>
+ +#endif
   
   #include "timing.h"
   #include "irq.h"
@@@ -552,7 -548,6 +552,7 @@@ int kvm_vm_ioctl_check_extension(struc
   #ifdef CONFIG_KVM_XICS
         case KVM_CAP_IRQ_XICS:
   #endif
+ +      case KVM_CAP_PPC_GET_CPU_CHAR:
                 r = 1;
                 break;
   
@@@ -763,7 -758,7 +763,7 @@@ int kvm_arch_vcpu_init(struct kvm_vcpu 
   
         hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
         vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
-       vcpu->arch.dec_expires = ~(u64)0;
+       vcpu->arch.dec_expires = get_tb();
   
   #ifdef CONFIG_KVM_EXIT_TIMING
         mutex_init(&vcpu->arch.exit_timing_lock);
@@@ -1106,11 -1101,9 +1106,9 @@@ int kvmppc_handle_vsx_load(struct kvm_r
   {
         enum emulation_result emulated = EMULATE_DONE;
   
-       /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
-       if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
-               (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
+       /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
+       if (vcpu->arch.mmio_vsx_copy_nums > 4)
                 return EMULATE_FAIL;
-       }
   
         while (vcpu->arch.mmio_vsx_copy_nums) {
                 emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
@@@ -1252,11 -1245,9 +1250,9 @@@ int kvmppc_handle_vsx_store(struct kvm_
   
         vcpu->arch.io_gpr = rs;
   
-       /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
-       if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
-               (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
+       /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
+       if (vcpu->arch.mmio_vsx_copy_nums > 4)
                 return EMULATE_FAIL;
-       }
   
         while (vcpu->arch.mmio_vsx_copy_nums) {
                 if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
@@@ -1413,8 -1404,6 +1409,8 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
   {
         int r;
   
+ +      vcpu_load(vcpu);
+ +
         if (vcpu->mmio_needed) {
                 vcpu->mmio_needed = 0;
                 if (!vcpu->mmio_is_write)
@@@ -1429,7 -1418,7 +1425,7 @@@
                         r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run);
                         if (r == RESUME_HOST) {
                                 vcpu->mmio_needed = 1;
- -                              return r;
+ +                              goto out;
                         }
                 }
   #endif
@@@ -1463,8 -1452,6 +1459,8 @@@
   
         kvm_sigset_deactivate(vcpu);
   
+ +out:
+ +      vcpu_put(vcpu);
         return r;
   }
   
@@@ -1612,31 -1599,23 +1608,31 @@@ int kvm_arch_vcpu_ioctl_set_mpstate(str
         return -EINVAL;
   }
   
- -long kvm_arch_vcpu_ioctl(struct file *filp,
- -                         unsigned int ioctl, unsigned long arg)
+ +long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ +                             unsigned int ioctl, unsigned long arg)
   {
         struct kvm_vcpu *vcpu = filp->private_data;
         void __user *argp = (void __user *)arg;
- -      long r;
   
- -      switch (ioctl) {
- -      case KVM_INTERRUPT: {
+ +      if (ioctl == KVM_INTERRUPT) {
                 struct kvm_interrupt irq;
- -              r = -EFAULT;
                 if (copy_from_user(&irq, argp, sizeof(irq)))
- -                      goto out;
- -              r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
- -              goto out;
+ +                      return -EFAULT;
+ +              return kvm_vcpu_ioctl_interrupt(vcpu, &irq);
         }
+ +      return -ENOIOCTLCMD;
+ +}
+ +
+ +long kvm_arch_vcpu_ioctl(struct file *filp,
+ +                         unsigned int ioctl, unsigned long arg)
+ +{
+ +      struct kvm_vcpu *vcpu = filp->private_data;
+ +      void __user *argp = (void __user *)arg;
+ +      long r;
+ +
+ +      vcpu_load(vcpu);
   
+ +      switch (ioctl) {
         case KVM_ENABLE_CAP:
         {
                 struct kvm_enable_cap cap;
@@@ -1676,7 -1655,6 +1672,7 @@@
         }
   
   out:
+ +      vcpu_put(vcpu);
         return r;
   }
   
@@@ -1777,124 -1755,6 +1773,124 @@@ static int kvm_vm_ioctl_enable_cap(stru
         return r;
   }
   
+ +#ifdef CONFIG_PPC_BOOK3S_64
+ +/*
+ + * These functions check whether the underlying hardware is safe
+ + * against attacks based on observing the effects of speculatively
+ + * executed instructions, and whether it supplies instructions for
+ + * use in workarounds.  The information comes from firmware, either
+ + * via the device tree on powernv platforms or from an hcall on
+ + * pseries platforms.
+ + */
+ +#ifdef CONFIG_PPC_PSERIES
+ +static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+ +{
+ +      struct h_cpu_char_result c;
+ +      unsigned long rc;
+ +
+ +      if (!machine_is(pseries))
+ +              return -ENOTTY;
+ +
+ +      rc = plpar_get_cpu_characteristics(&c);
+ +      if (rc == H_SUCCESS) {
+ +              cp->character = c.character;
+ +              cp->behaviour = c.behaviour;
+ +              cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 |
+ +                      KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED |
+ +                      KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 |
+ +                      KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 |
+ +                      KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV |
+ +                      KVM_PPC_CPU_CHAR_BR_HINT_HONOURED |
+ +                      KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF |
+ +                      KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS;
+ +              cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY |
+ +                      KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR |
+ +                      KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+ +      }
+ +      return 0;
+ +}
+ +#else
+ +static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+ +{
+ +      return -ENOTTY;
+ +}
+ +#endif
+ +
+ +static inline bool have_fw_feat(struct device_node *fw_features,
+ +                              const char *state, const char *name)
+ +{
+ +      struct device_node *np;
+ +      bool r = false;
+ +
+ +      np = of_get_child_by_name(fw_features, name);
+ +      if (np) {
+ +              r = of_property_read_bool(np, state);
+ +              of_node_put(np);
+ +      }
+ +      return r;
+ +}
+ +
+ +static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+ +{
+ +      struct device_node *np, *fw_features;
+ +      int r;
+ +
+ +      memset(cp, 0, sizeof(*cp));
+ +      r = pseries_get_cpu_char(cp);
+ +      if (r != -ENOTTY)
+ +              return r;
+ +
+ +      np = of_find_node_by_name(NULL, "ibm,opal");
+ +      if (np) {
+ +              fw_features = of_get_child_by_name(np, "fw-features");
+ +              of_node_put(np);
+ +              if (!fw_features)
+ +                      return 0;
+ +              if (have_fw_feat(fw_features, "enabled",
+ +                               "inst-spec-barrier-ori31,31,0"))
+ +                      cp->character |= KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31;
+ +              if (have_fw_feat(fw_features, "enabled",
+ +                               "fw-bcctrl-serialized"))
+ +                      cp->character |= KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED;
+ +              if (have_fw_feat(fw_features, "enabled",
+ +                               "inst-l1d-flush-ori30,30,0"))
+ +                      cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30;
+ +              if (have_fw_feat(fw_features, "enabled",
+ +                               "inst-l1d-flush-trig2"))
+ +                      cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2;
+ +              if (have_fw_feat(fw_features, "enabled",
+ +                               "fw-l1d-thread-split"))
+ +                      cp->character |= KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV;
+ +              if (have_fw_feat(fw_features, "enabled",
+ +                               "fw-count-cache-disabled"))
+ +                      cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS;
+ +              cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 |
+ +                      KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED |
+ +                      KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 |
+ +                      KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 |
+ +                      KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV |
+ +                      KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS;
+ +
+ +              if (have_fw_feat(fw_features, "enabled",
+ +                               "speculation-policy-favor-security"))
+ +                      cp->behaviour |= KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY;
+ +              if (!have_fw_feat(fw_features, "disabled",
+ +                                "needs-l1d-flush-msr-pr-0-to-1"))
+ +                      cp->behaviour |= KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR;
+ +              if (!have_fw_feat(fw_features, "disabled",
+ +                                "needs-spec-barrier-for-bound-checks"))
+ +                      cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+ +              cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY |
+ +                      KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR |
+ +                      KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+ +
+ +              of_node_put(fw_features);
+ +      }
+ +
+ +      return 0;
+ +}
+ +#endif
+ +
   long kvm_arch_vm_ioctl(struct file *filp,
                          unsigned int ioctl, unsigned long arg)
   {
@@@ -1997,14 -1857,6 +1993,14 @@@
                         r = -EFAULT;
                 break;
         }
+ +      case KVM_PPC_GET_CPU_CHAR: {
+ +              struct kvm_ppc_cpu_char cpuchar;
+ +
+ +              r = kvmppc_get_cpu_char(&cpuchar);
+ +              if (r >= 0 && copy_to_user(argp, &cpuchar, sizeof(cpuchar)))
+ +                      r = -EFAULT;
+ +              break;
+ +      }
         default: {
                 struct kvm *kvm = filp->private_data;
                 r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
author	Radim Krčmář <rkrcmar@redhat.com>
	Thu, 1 Feb 2018 15:13:07 +0000 (16:13 +0100)
committer	Radim Krčmář <rkrcmar@redhat.com>
	Thu, 1 Feb 2018 15:13:07 +0000 (16:13 +0100)
		1	2
Documentation/virtual/kvm/api.txt	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/include/uapi/asm/kvm.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/asm-offsets.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_hv_rmhandlers.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_xive.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/powerpc.c	patch \|	diff1 \|	diff2 \|	blob \| history