Merge tag 'kvm-4.16-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 10 Feb 2018 21:16:35 +0000 (13:16 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 10 Feb 2018 21:16:35 +0000 (13:16 -0800)
Pull KVM updates from Radim Krčmář:
 "ARM:

   - icache invalidation optimizations, improving VM startup time

   - support for forwarded level-triggered interrupts, improving
     performance for timers and passthrough platform devices

   - a small fix for power-management notifiers, and some cosmetic
     changes

  PPC:

   - add MMIO emulation for vector loads and stores

   - allow HPT guests to run on a radix host on POWER9 v2.2 CPUs without
     requiring the complex thread synchronization of older CPU versions

   - improve the handling of escalation interrupts with the XIVE
     interrupt controller

   - support decrement register migration

   - various cleanups and bugfixes.

  s390:

   - Cornelia Huck passed maintainership to Janosch Frank

   - exitless interrupts for emulated devices

   - cleanup of cpuflag handling

   - kvm_stat counter improvements

   - VSIE improvements

   - mm cleanup

  x86:

   - hypervisor part of SEV

   - UMIP, RDPID, and MSR_SMI_COUNT emulation

   - paravirtualized TLB shootdown using the new KVM_VCPU_PREEMPTED bit

   - allow guests to see TOPOEXT, GFNI, VAES, VPCLMULQDQ, and more
     AVX512 features

   - show vcpu id in its anonymous inode name

   - many fixes and cleanups

   - per-VCPU MSR bitmaps (already merged through x86/pti branch)

   - stable KVM clock when nesting on Hyper-V (merged through
     x86/hyperv)"

* tag 'kvm-4.16-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (197 commits)
  KVM: PPC: Book3S: Add MMIO emulation for VMX instructions
  KVM: PPC: Book3S HV: Branch inside feature section
  KVM: PPC: Book3S HV: Make HPT resizing work on POWER9
  KVM: PPC: Book3S HV: Fix handling of secondary HPTEG in HPT resizing code
  KVM: PPC: Book3S PR: Fix broken select due to misspelling
  KVM: x86: don't forget vcpu_put() in kvm_arch_vcpu_ioctl_set_sregs()
  KVM: PPC: Book3S PR: Fix svcpu copying with preemption enabled
  KVM: PPC: Book3S HV: Drop locks before reading guest memory
  kvm: x86: remove efer_reload entry in kvm_vcpu_stat
  KVM: x86: AMD Processor Topology Information
  x86/kvm/vmx: do not use vm-exit instruction length for fast MMIO when running nested
  kvm: embed vcpu id to dentry of vcpu anon inode
  kvm: Map PFN-type memory regions as writable (if possible)
  x86/kvm: Make it compile on 32bit and with HYPYERVISOR_GUEST=n
  KVM: arm/arm64: Fixup userspace irqchip static key optimization
  KVM: arm/arm64: Fix userspace_irqchip_in_use counting
  KVM: arm/arm64: Fix incorrect timer_is_pending logic
  MAINTAINERS: update KVM/s390 maintainers
  MAINTAINERS: add Halil as additional vfio-ccw maintainer
  MAINTAINERS: add David as a reviewer for KVM/s390
  ...

28 files changed:
1  2 
MAINTAINERS
arch/arm/include/asm/kvm_host.h
arch/arm/include/asm/kvm_mmu.h
arch/arm64/include/asm/assembler.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/pgtable-hwdef.h
arch/arm64/include/asm/pgtable-prot.h
arch/arm64/kvm/hyp/switch.c
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/opal-api.h
arch/powerpc/include/asm/ppc-opcode.h
arch/powerpc/include/asm/xive.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_xive.c
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/hyperv/hv_init.c
arch/x86/include/asm/mshyperv.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
virt/kvm/arm/arm.c
virt/kvm/arm/mmu.c
virt/kvm/kvm_main.c

diff --cc MAINTAINERS
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 2db84df5eb422497d32f039bf5fa17d6c444a775,4e12dabd342b0676f75b25ff6fc2006f5382647e..108ecad7acc5617aa3c6c7573ba578b9e485f78d
  #define PROT_SECT_NORMAL      (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
  #define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
  
 -#define _PAGE_DEFAULT         (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
 +#define _PAGE_DEFAULT         (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
 +#define _HYP_PAGE_DEFAULT     _PAGE_DEFAULT
  
 -#define PAGE_KERNEL           __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 -#define PAGE_KERNEL_RO                __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
 -#define PAGE_KERNEL_ROX               __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
 -#define PAGE_KERNEL_EXEC      __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 -#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 +#define PAGE_KERNEL           __pgprot(PROT_NORMAL)
 +#define PAGE_KERNEL_RO                __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY)
 +#define PAGE_KERNEL_ROX               __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY)
 +#define PAGE_KERNEL_EXEC      __pgprot(PROT_NORMAL & ~PTE_PXN)
 +#define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
  
 -#define PAGE_HYP              __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
 -#define PAGE_HYP_EXEC         __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
 -#define PAGE_HYP_RO           __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
 +#define PAGE_HYP              __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
 +#define PAGE_HYP_EXEC         __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
 +#define PAGE_HYP_RO           __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
  #define PAGE_HYP_DEVICE               __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
  
- #define PAGE_S2                       __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
- #define PAGE_S2_DEVICE                __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
 -#define PAGE_S2                       __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN)
 -#define PAGE_S2_DEVICE                __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
++#define PAGE_S2                       __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN)
++#define PAGE_S2_DEVICE                __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
  
 -#define PAGE_NONE             __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
 +#define PAGE_NONE             __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
  #define PAGE_SHARED           __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
  #define PAGE_SHARED_EXEC      __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
  #define PAGE_READONLY         __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index b52af150cbd8e55c8b77b75624534b989d372ee2,1790002a2052d96a4686d8375dee88b49807fba5..25283f7eb299e06ab703c11f7a2f95c0425f6a95
@@@ -314,13 -315,21 +315,21 @@@ void hyperv_init(void)
  void hyperv_setup_mmu_ops(void);
  void hyper_alloc_mmu(void);
  void hyperv_report_panic(struct pt_regs *regs, long err);
 -bool hv_is_hypercall_page_setup(void);
 +bool hv_is_hyperv_initialized(void);
  void hyperv_cleanup(void);
+ void hyperv_reenlightenment_intr(struct pt_regs *regs);
+ void set_hv_tscchange_cb(void (*cb)(void));
+ void clear_hv_tscchange_cb(void);
+ void hyperv_stop_tsc_emulation(void);
  #else /* CONFIG_HYPERV */
  static inline void hyperv_init(void) {}
 -static inline bool hv_is_hypercall_page_setup(void) { return false; }
 +static inline bool hv_is_hyperv_initialized(void) { return false; }
  static inline void hyperv_cleanup(void) {}
  static inline void hyperv_setup_mmu_ops(void) {}
+ static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
+ static inline void clear_hv_tscchange_cb(void) {}
+ static inline void hyperv_stop_tsc_emulation(void) {};
  #endif /* CONFIG_HYPERV */
  
  #ifdef CONFIG_HYPERV_TSCPAGE
index 13f5d4217e4f1c0ee27f13935b04725949aa30d2,20e491b94f44e4ba5bc78294b6a0b281c2b43c39..a0c5a69bc7c4a324078db14ad27753443d65aa85
@@@ -363,12 -371,9 +369,13 @@@ static inline int __do_cpuid_ent(struc
                F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
                F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
                F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
-               0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
+               0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
+               F(TOPOEXT);
  
 +      /* cpuid 0x80000008.ebx */
 +      const u32 kvm_cpuid_8000_0008_ebx_x86_features =
 +              F(IBPB) | F(IBRS);
 +
        /* cpuid 0xC0000001.edx */
        const u32 kvm_cpuid_C000_0001_edx_x86_features =
                F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
index 4e3c7953052634a040bd4aff6d881d03336bf18a,1bf20e9160bd52b8e3bdc42d0acee6718c8037a2..b3e488a748281aa5f3d319861ae2ab4bfca3e65c
@@@ -533,7 -573,9 +577,10 @@@ struct svm_cpu_data 
        struct kvm_ldttss_desc *tss_desc;
  
        struct page *save_area;
 +      struct vmcb *current_vmcb;
+       /* index = sev_asid, value = vmcb pointer */
+       struct vmcb **sev_vmcbs;
  };
  
  static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
index bee4c49f6dd0849de280e00fac7ab6777e484bbb,9973a301364e0e0c8aa7d8be23f0c0d8e69ac3c7..f427723dc7db34fab153b4faecbbb767b48f7e06
@@@ -903,18 -864,25 +869,22 @@@ static const unsigned short vmcs_field_
  
  static inline short vmcs_field_to_offset(unsigned long field)
  {
 +      const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
 +      unsigned short offset;
+       unsigned index;
+       if (field >> 15)
+               return -ENOENT;
  
-       BUILD_BUG_ON(size > SHRT_MAX);
-       if (field >= size)
+       index = ROL16(field, 6);
 -      if (index >= ARRAY_SIZE(vmcs_field_to_offset_table))
++      if (index >= size)
                return -ENOENT;
  
-       field = array_index_nospec(field, size);
-       offset = vmcs_field_to_offset_table[field];
 -      /*
 -       * FIXME: Mitigation for CVE-2017-5753.  To be replaced with a
 -       * generic mechanism.
 -       */
 -      asm("lfence");
 -
 -      if (vmcs_field_to_offset_table[index] == 0)
++      index = array_index_nospec(index, size);
++      offset = vmcs_field_to_offset_table[index];
 +      if (offset == 0)
                return -ENOENT;
 -
 -      return vmcs_field_to_offset_table[index];
 +      return offset;
  }
  
  static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
@@@ -10206,69 -10049,55 +10211,83 @@@ static inline bool nested_vmx_prepare_m
        struct page *page;
        unsigned long *msr_bitmap_l1;
        unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
 +      /*
 +       * pred_cmd & spec_ctrl are trying to verify two things:
 +       *
 +       * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
 +       *    ensures that we do not accidentally generate an L02 MSR bitmap
 +       *    from the L12 MSR bitmap that is too permissive.
 +       * 2. That L1 or L2s have actually used the MSR. This avoids
 +       *    unnecessarily merging of the bitmap if the MSR is unused. This
 +       *    works properly because we only update the L01 MSR bitmap lazily.
 +       *    So even if L0 should pass L1 these MSRs, the L01 bitmap is only
 +       *    updated to reflect this when L1 (or its L2s) actually write to
 +       *    the MSR.
 +       */
 +      bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
 +      bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
  
 -      /* This shortcut is ok because we support only x2APIC MSRs so far. */
 -      if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
+       /* Nothing to do if the MSR bitmap is not in use.  */
+       if (!cpu_has_vmx_msr_bitmap() ||
+           !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
+               return false;
 +      if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
 +          !pred_cmd && !spec_ctrl)
                return false;
  
        page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
        if (is_error_page(page))
                return false;
-       msr_bitmap_l1 = (unsigned long *)kmap(page);
  
-       memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
+       msr_bitmap_l1 = (unsigned long *)kmap(page);
+       if (nested_cpu_has_apic_reg_virt(vmcs12)) {
+               /*
+                * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it
+                * just lets the processor take the value from the virtual-APIC page;
+                * take those 256 bits directly from the L1 bitmap.
+                */
+               for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+                       unsigned word = msr / BITS_PER_LONG;
+                       msr_bitmap_l0[word] = msr_bitmap_l1[word];
+                       msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
+               }
+       } else {
+               for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+                       unsigned word = msr / BITS_PER_LONG;
+                       msr_bitmap_l0[word] = ~0;
+                       msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
+               }
+       }
  
-       if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
-               if (nested_cpu_has_apic_reg_virt(vmcs12))
-                       for (msr = 0x800; msr <= 0x8ff; msr++)
-                               nested_vmx_disable_intercept_for_msr(
-                                       msr_bitmap_l1, msr_bitmap_l0,
-                                       msr, MSR_TYPE_R);
+       nested_vmx_disable_intercept_for_msr(
+               msr_bitmap_l1, msr_bitmap_l0,
+               X2APIC_MSR(APIC_TASKPRI),
+               MSR_TYPE_W);
  
+       if (nested_cpu_has_vid(vmcs12)) {
                nested_vmx_disable_intercept_for_msr(
-                               msr_bitmap_l1, msr_bitmap_l0,
-                               APIC_BASE_MSR + (APIC_TASKPRI >> 4),
-                               MSR_TYPE_R | MSR_TYPE_W);
-               if (nested_cpu_has_vid(vmcs12)) {
-                       nested_vmx_disable_intercept_for_msr(
-                               msr_bitmap_l1, msr_bitmap_l0,
-                               APIC_BASE_MSR + (APIC_EOI >> 4),
-                               MSR_TYPE_W);
-                       nested_vmx_disable_intercept_for_msr(
-                               msr_bitmap_l1, msr_bitmap_l0,
-                               APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
-                               MSR_TYPE_W);
-               }
+                       msr_bitmap_l1, msr_bitmap_l0,
+                       X2APIC_MSR(APIC_EOI),
+                       MSR_TYPE_W);
+               nested_vmx_disable_intercept_for_msr(
+                       msr_bitmap_l1, msr_bitmap_l0,
+                       X2APIC_MSR(APIC_SELF_IPI),
+                       MSR_TYPE_W);
        }
 +
 +      if (spec_ctrl)
 +              nested_vmx_disable_intercept_for_msr(
 +                                      msr_bitmap_l1, msr_bitmap_l0,
 +                                      MSR_IA32_SPEC_CTRL,
 +                                      MSR_TYPE_R | MSR_TYPE_W);
 +
 +      if (pred_cmd)
 +              nested_vmx_disable_intercept_for_msr(
 +                                      msr_bitmap_l1, msr_bitmap_l0,
 +                                      MSR_IA32_PRED_CMD,
 +                                      MSR_TYPE_W);
 +
        kunmap(page);
        kvm_release_page_clean(page);
  
Simple merge
Simple merge
Simple merge
Simple merge