Merge branch 'kvm-amd-fixes' into HEAD

author Paolo Bonzini <pbonzini@redhat.com>

Wed, 13 May 2020 16:14:05 +0000 (12:14 -0400)

committer Paolo Bonzini <pbonzini@redhat.com>

Wed, 13 May 2020 16:14:05 +0000 (12:14 -0400)
author Paolo Bonzini <pbonzini@redhat.com>
Wed, 13 May 2020 16:14:05 +0000 (12:14 -0400)
committer Paolo Bonzini <pbonzini@redhat.com>
Wed, 13 May 2020 16:14:05 +0000 (12:14 -0400)
diff --combined arch/arm64/kvm/guest.c

index 8417b200bec9461e7aa0b90c82c30a91240aa5d5,50a279d3ddd783ad751fefa4790b26aeb4adcfdb..863a0d158fb838ddae042d0aad9a926f91a2160f
--- 1/arch/arm64/kvm/guest.c
--- 2/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@@ -29,17 -29,20 +29,17 @@@
   
   #include "trace.h"
   
- -#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM }
- -#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
- -
   struct kvm_stats_debugfs_item debugfs_entries[] = {
- -      VCPU_STAT(halt_successful_poll),
- -      VCPU_STAT(halt_attempted_poll),
- -      VCPU_STAT(halt_poll_invalid),
- -      VCPU_STAT(halt_wakeup),
- -      VCPU_STAT(hvc_exit_stat),
- -      VCPU_STAT(wfe_exit_stat),
- -      VCPU_STAT(wfi_exit_stat),
- -      VCPU_STAT(mmio_exit_user),
- -      VCPU_STAT(mmio_exit_kernel),
- -      VCPU_STAT(exits),
+ +      VCPU_STAT("halt_successful_poll", halt_successful_poll),
+ +      VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+ +      VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+ +      VCPU_STAT("halt_wakeup", halt_wakeup),
+ +      VCPU_STAT("hvc_exit_stat", hvc_exit_stat),
+ +      VCPU_STAT("wfe_exit_stat", wfe_exit_stat),
+ +      VCPU_STAT("wfi_exit_stat", wfi_exit_stat),
+ +      VCPU_STAT("mmio_exit_user", mmio_exit_user),
+ +      VCPU_STAT("mmio_exit_kernel", mmio_exit_kernel),
+ +      VCPU_STAT("exits", exits),
         { NULL }
   };
   
@@@ -197,6 -200,13 +197,13 @@@ static int set_core_reg(struct kvm_vcp
         }
   
         memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id));
+ 
+       if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
+               int i;
+ 
+               for (i = 0; i < 16; i++)
+                       *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i);
+       }
   out:
         return err;
   }
diff --combined arch/powerpc/kvm/powerpc.c

index 7e24691e138ac6ca85d4fbb731a145b35edeab98,ad2f172c26a6c61100c7fc104fbbc75ae5177042..052614e9d4689ba2025543c601f96360de4a5df0
--- 1/arch/powerpc/kvm/powerpc.c
--- 2/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@@ -521,6 -521,7 +521,7 @@@ int kvm_vm_ioctl_check_extension(struc
         case KVM_CAP_IOEVENTFD:
         case KVM_CAP_DEVICE_CTRL:
         case KVM_CAP_IMMEDIATE_EXIT:
+       case KVM_CAP_SET_GUEST_DEBUG:
                 r = 1;
                 break;
         case KVM_CAP_PPC_GUEST_DEBUG_SSTEP:
@@@ -1764,9 -1765,8 +1765,9 @@@ int kvm_vcpu_ioctl_set_one_reg(struct k
         return r;
   }
   
- -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+ +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
   {
+ +      struct kvm_run *run = vcpu->run;
         int r;
   
         vcpu_load(vcpu);
diff --combined arch/s390/kvm/kvm-s390.c

index 75471b646fd71b48a2c7f8da32dfc48cef25d44c,d05bb040fd427c1b25e1c9693930aa2ec01f0412..389ff1b7cd43f63f260c5e2d932a00765bef186f
--- 1/arch/s390/kvm/kvm-s390.c
--- 2/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@@ -57,107 -57,110 +57,107 @@@
   #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
                            (KVM_MAX_VCPUS + LOCAL_IRQS))
   
- -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
- -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
- -
   struct kvm_stats_debugfs_item debugfs_entries[] = {
- -      { "userspace_handled", VCPU_STAT(exit_userspace) },
- -      { "exit_null", VCPU_STAT(exit_null) },
- -      { "exit_validity", VCPU_STAT(exit_validity) },
- -      { "exit_stop_request", VCPU_STAT(exit_stop_request) },
- -      { "exit_external_request", VCPU_STAT(exit_external_request) },
- -      { "exit_io_request", VCPU_STAT(exit_io_request) },
- -      { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
- -      { "exit_instruction", VCPU_STAT(exit_instruction) },
- -      { "exit_pei", VCPU_STAT(exit_pei) },
- -      { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
- -      { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
- -      { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
- -      { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
- -      { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
- -      { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
- -      { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
- -      { "halt_wakeup", VCPU_STAT(halt_wakeup) },
- -      { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
- -      { "instruction_lctl", VCPU_STAT(instruction_lctl) },
- -      { "instruction_stctl", VCPU_STAT(instruction_stctl) },
- -      { "instruction_stctg", VCPU_STAT(instruction_stctg) },
- -      { "deliver_ckc", VCPU_STAT(deliver_ckc) },
- -      { "deliver_cputm", VCPU_STAT(deliver_cputm) },
- -      { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
- -      { "deliver_external_call", VCPU_STAT(deliver_external_call) },
- -      { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
- -      { "deliver_virtio", VCPU_STAT(deliver_virtio) },
- -      { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
- -      { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
- -      { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
- -      { "deliver_program", VCPU_STAT(deliver_program) },
- -      { "deliver_io", VCPU_STAT(deliver_io) },
- -      { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
- -      { "exit_wait_state", VCPU_STAT(exit_wait_state) },
- -      { "inject_ckc", VCPU_STAT(inject_ckc) },
- -      { "inject_cputm", VCPU_STAT(inject_cputm) },
- -      { "inject_external_call", VCPU_STAT(inject_external_call) },
- -      { "inject_float_mchk", VM_STAT(inject_float_mchk) },
- -      { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
- -      { "inject_io", VM_STAT(inject_io) },
- -      { "inject_mchk", VCPU_STAT(inject_mchk) },
- -      { "inject_pfault_done", VM_STAT(inject_pfault_done) },
- -      { "inject_program", VCPU_STAT(inject_program) },
- -      { "inject_restart", VCPU_STAT(inject_restart) },
- -      { "inject_service_signal", VM_STAT(inject_service_signal) },
- -      { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
- -      { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
- -      { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
- -      { "inject_virtio", VM_STAT(inject_virtio) },
- -      { "instruction_epsw", VCPU_STAT(instruction_epsw) },
- -      { "instruction_gs", VCPU_STAT(instruction_gs) },
- -      { "instruction_io_other", VCPU_STAT(instruction_io_other) },
- -      { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
- -      { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
- -      { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
- -      { "instruction_ptff", VCPU_STAT(instruction_ptff) },
- -      { "instruction_stidp", VCPU_STAT(instruction_stidp) },
- -      { "instruction_sck", VCPU_STAT(instruction_sck) },
- -      { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
- -      { "instruction_spx", VCPU_STAT(instruction_spx) },
- -      { "instruction_stpx", VCPU_STAT(instruction_stpx) },
- -      { "instruction_stap", VCPU_STAT(instruction_stap) },
- -      { "instruction_iske", VCPU_STAT(instruction_iske) },
- -      { "instruction_ri", VCPU_STAT(instruction_ri) },
- -      { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
- -      { "instruction_sske", VCPU_STAT(instruction_sske) },
- -      { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
- -      { "instruction_essa", VCPU_STAT(instruction_essa) },
- -      { "instruction_stsi", VCPU_STAT(instruction_stsi) },
- -      { "instruction_stfl", VCPU_STAT(instruction_stfl) },
- -      { "instruction_tb", VCPU_STAT(instruction_tb) },
- -      { "instruction_tpi", VCPU_STAT(instruction_tpi) },
- -      { "instruction_tprot", VCPU_STAT(instruction_tprot) },
- -      { "instruction_tsch", VCPU_STAT(instruction_tsch) },
- -      { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
- -      { "instruction_sie", VCPU_STAT(instruction_sie) },
- -      { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
- -      { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
- -      { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
- -      { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
- -      { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
- -      { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
- -      { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
- -      { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
- -      { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
- -      { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
- -      { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
- -      { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
- -      { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
- -      { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
- -      { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
- -      { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
- -      { "instruction_diag_10", VCPU_STAT(diagnose_10) },
- -      { "instruction_diag_44", VCPU_STAT(diagnose_44) },
- -      { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
- -      { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
- -      { "instruction_diag_258", VCPU_STAT(diagnose_258) },
- -      { "instruction_diag_308", VCPU_STAT(diagnose_308) },
- -      { "instruction_diag_500", VCPU_STAT(diagnose_500) },
- -      { "instruction_diag_other", VCPU_STAT(diagnose_other) },
+ +      VCPU_STAT("userspace_handled", exit_userspace),
+ +      VCPU_STAT("exit_null", exit_null),
+ +      VCPU_STAT("exit_validity", exit_validity),
+ +      VCPU_STAT("exit_stop_request", exit_stop_request),
+ +      VCPU_STAT("exit_external_request", exit_external_request),
+ +      VCPU_STAT("exit_io_request", exit_io_request),
+ +      VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
+ +      VCPU_STAT("exit_instruction", exit_instruction),
+ +      VCPU_STAT("exit_pei", exit_pei),
+ +      VCPU_STAT("exit_program_interruption", exit_program_interruption),
+ +      VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
+ +      VCPU_STAT("exit_operation_exception", exit_operation_exception),
+ +      VCPU_STAT("halt_successful_poll", halt_successful_poll),
+ +      VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+ +      VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+ +      VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
+ +      VCPU_STAT("halt_wakeup", halt_wakeup),
+ +      VCPU_STAT("instruction_lctlg", instruction_lctlg),
+ +      VCPU_STAT("instruction_lctl", instruction_lctl),
+ +      VCPU_STAT("instruction_stctl", instruction_stctl),
+ +      VCPU_STAT("instruction_stctg", instruction_stctg),
+ +      VCPU_STAT("deliver_ckc", deliver_ckc),
+ +      VCPU_STAT("deliver_cputm", deliver_cputm),
+ +      VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
+ +      VCPU_STAT("deliver_external_call", deliver_external_call),
+ +      VCPU_STAT("deliver_service_signal", deliver_service_signal),
+ +      VCPU_STAT("deliver_virtio", deliver_virtio),
+ +      VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
+ +      VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
+ +      VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
+ +      VCPU_STAT("deliver_program", deliver_program),
+ +      VCPU_STAT("deliver_io", deliver_io),
+ +      VCPU_STAT("deliver_machine_check", deliver_machine_check),
+ +      VCPU_STAT("exit_wait_state", exit_wait_state),
+ +      VCPU_STAT("inject_ckc", inject_ckc),
+ +      VCPU_STAT("inject_cputm", inject_cputm),
+ +      VCPU_STAT("inject_external_call", inject_external_call),
+ +      VM_STAT("inject_float_mchk", inject_float_mchk),
+ +      VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
+ +      VM_STAT("inject_io", inject_io),
+ +      VCPU_STAT("inject_mchk", inject_mchk),
+ +      VM_STAT("inject_pfault_done", inject_pfault_done),
+ +      VCPU_STAT("inject_program", inject_program),
+ +      VCPU_STAT("inject_restart", inject_restart),
+ +      VM_STAT("inject_service_signal", inject_service_signal),
+ +      VCPU_STAT("inject_set_prefix", inject_set_prefix),
+ +      VCPU_STAT("inject_stop_signal", inject_stop_signal),
+ +      VCPU_STAT("inject_pfault_init", inject_pfault_init),
+ +      VM_STAT("inject_virtio", inject_virtio),
+ +      VCPU_STAT("instruction_epsw", instruction_epsw),
+ +      VCPU_STAT("instruction_gs", instruction_gs),
+ +      VCPU_STAT("instruction_io_other", instruction_io_other),
+ +      VCPU_STAT("instruction_lpsw", instruction_lpsw),
+ +      VCPU_STAT("instruction_lpswe", instruction_lpswe),
+ +      VCPU_STAT("instruction_pfmf", instruction_pfmf),
+ +      VCPU_STAT("instruction_ptff", instruction_ptff),
+ +      VCPU_STAT("instruction_stidp", instruction_stidp),
+ +      VCPU_STAT("instruction_sck", instruction_sck),
+ +      VCPU_STAT("instruction_sckpf", instruction_sckpf),
+ +      VCPU_STAT("instruction_spx", instruction_spx),
+ +      VCPU_STAT("instruction_stpx", instruction_stpx),
+ +      VCPU_STAT("instruction_stap", instruction_stap),
+ +      VCPU_STAT("instruction_iske", instruction_iske),
+ +      VCPU_STAT("instruction_ri", instruction_ri),
+ +      VCPU_STAT("instruction_rrbe", instruction_rrbe),
+ +      VCPU_STAT("instruction_sske", instruction_sske),
+ +      VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
+ +      VCPU_STAT("instruction_essa", instruction_essa),
+ +      VCPU_STAT("instruction_stsi", instruction_stsi),
+ +      VCPU_STAT("instruction_stfl", instruction_stfl),
+ +      VCPU_STAT("instruction_tb", instruction_tb),
+ +      VCPU_STAT("instruction_tpi", instruction_tpi),
+ +      VCPU_STAT("instruction_tprot", instruction_tprot),
+ +      VCPU_STAT("instruction_tsch", instruction_tsch),
+ +      VCPU_STAT("instruction_sthyi", instruction_sthyi),
+ +      VCPU_STAT("instruction_sie", instruction_sie),
+ +      VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
+ +      VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
+ +      VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
+ +      VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
+ +      VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
+ +      VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
+ +      VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
+ +      VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
+ +      VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
+ +      VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
+ +      VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
+ +      VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
+ +      VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
+ +      VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
+ +      VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
+ +      VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
+ +      VCPU_STAT("instruction_diag_10", diagnose_10),
+ +      VCPU_STAT("instruction_diag_44", diagnose_44),
+ +      VCPU_STAT("instruction_diag_9c", diagnose_9c),
+ +      VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
+ +      VCPU_STAT("instruction_diag_258", diagnose_258),
+ +      VCPU_STAT("instruction_diag_308", diagnose_308),
+ +      VCPU_STAT("instruction_diag_500", diagnose_500),
+ +      VCPU_STAT("instruction_diag_other", diagnose_other),
         { NULL }
   };
   
@@@ -542,6 -545,7 +542,7 @@@ int kvm_vm_ioctl_check_extension(struc
         case KVM_CAP_S390_AIS:
         case KVM_CAP_S390_AIS_MIGRATION:
         case KVM_CAP_S390_VCPU_RESETS:
+       case KVM_CAP_SET_GUEST_DEBUG:
                 r = 1;
                 break;
         case KVM_CAP_S390_HPAGE_1M:
@@@ -4333,9 -4337,8 +4334,9 @@@ static void store_regs(struct kvm_vcpu 
                 store_regs_fmt2(vcpu, kvm_run);
   }
   
- -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+ +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
   {
+ +      struct kvm_run *kvm_run = vcpu->run;
         int rc;
   
         if (kvm_run->immediate_exit)
diff --combined arch/x86/include/asm/kvm_host.h

index a239a297be33177a5bc1d27c6ff72a97135a45dc,0a6b35353fc794c7ef6dcb4fbc38915221f1ed79..b3a5da27c2a5adfcf92d22b7ad328f5aab777f03
--- 1/arch/x86/include/asm/kvm_host.h
--- 2/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@@ -83,9 -83,6 +83,9 @@@
   #define KVM_REQ_GET_VMCS12_PAGES      KVM_ARCH_REQ(24)
   #define KVM_REQ_APICV_UPDATE \
         KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+ +#define KVM_REQ_TLB_FLUSH_CURRENT     KVM_ARCH_REQ(26)
+ +#define KVM_REQ_HV_TLB_FLUSH \
+ +      KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
   
   #define CR0_RESERVED_BITS                                               \
         (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@@ -170,8 -167,6 +170,8 @@@ enum kvm_reg 
         VCPU_EXREG_CR3,
         VCPU_EXREG_RFLAGS,
         VCPU_EXREG_SEGMENTS,
+ +      VCPU_EXREG_EXIT_INFO_1,
+ +      VCPU_EXREG_EXIT_INFO_2,
   };
   
   enum {
@@@ -377,12 -372,12 +377,12 @@@ struct rsvd_bits_validate 
   };
   
   struct kvm_mmu_root_info {
- -      gpa_t cr3;
+ +      gpa_t pgd;
         hpa_t hpa;
   };
   
   #define KVM_MMU_ROOT_INFO_INVALID \
- -      ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
+ +      ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE })
   
   #define KVM_MMU_NUM_PREV_ROOTS 3
   
@@@ -408,7 -403,7 +408,7 @@@ struct kvm_mmu 
         void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
                            u64 *spte, const void *pte);
         hpa_t root_hpa;
- -      gpa_t root_cr3;
+ +      gpa_t root_pgd;
         union kvm_mmu_role mmu_role;
         u8 root_level;
         u8 shadow_root_level;
@@@ -583,6 -578,7 +583,7 @@@ struct kvm_vcpu_arch 
         unsigned long cr4;
         unsigned long cr4_guest_owned_bits;
         unsigned long cr8;
+       u32 host_pkru;
         u32 pkru;
         u32 hflags;
         u64 efer;
@@@ -1098,16 -1094,13 +1099,14 @@@ struct kvm_x86_ops 
         void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
         void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
         void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
-       u64 (*get_dr6)(struct kvm_vcpu *vcpu);
-       void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
         void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
         void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
         void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
         unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
         void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
   
- -      void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
+ +      void (*tlb_flush_all)(struct kvm_vcpu *vcpu);
+ +      void (*tlb_flush_current)(struct kvm_vcpu *vcpu);
         int  (*tlb_remote_flush)(struct kvm *kvm);
         int  (*tlb_remote_flush_with_range)(struct kvm *kvm,
                         struct kvm_tlb_range *range);
@@@ -1120,13 -1113,7 +1119,13 @@@
          */
         void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
   
- -      void (*run)(struct kvm_vcpu *vcpu);
+ +      /*
+ +       * Flush any TLB entries created by the guest.  Like tlb_flush_gva(),
+ +       * does not need to flush GPA->HPA mappings.
+ +       */
+ +      void (*tlb_flush_guest)(struct kvm_vcpu *vcpu);
+ +
+ +      enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu);
         int (*handle_exit)(struct kvm_vcpu *vcpu,
                 enum exit_fastpath_completion exit_fastpath);
         int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
@@@ -1154,7 -1141,7 +1153,7 @@@
         bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
         void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
         void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
- -      void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
+ +      void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
         int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
         int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
         int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
@@@ -1176,8 -1163,10 +1175,8 @@@
                                struct x86_instruction_info *info,
                                enum x86_intercept_stage stage,
                                struct x86_exception *exception);
- -      void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu,
- -              enum exit_fastpath_completion *exit_fastpath);
+ +      void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
   
- -      int (*check_nested_events)(struct kvm_vcpu *vcpu);
         void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
   
         void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
@@@ -1210,7 -1199,6 +1209,7 @@@
   
         /* pmu operations of sub-arch */
         const struct kvm_pmu_ops *pmu_ops;
+ +      const struct kvm_x86_nested_ops *nested_ops;
   
         /*
          * Architecture specific hooks for vCPU blocking due to
@@@ -1238,6 -1226,14 +1237,6 @@@
   
         void (*setup_mce)(struct kvm_vcpu *vcpu);
   
- -      int (*get_nested_state)(struct kvm_vcpu *vcpu,
- -                              struct kvm_nested_state __user *user_kvm_nested_state,
- -                              unsigned user_data_size);
- -      int (*set_nested_state)(struct kvm_vcpu *vcpu,
- -                              struct kvm_nested_state __user *user_kvm_nested_state,
- -                              struct kvm_nested_state *kvm_state);
- -      bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
- -
         int (*smi_allowed)(struct kvm_vcpu *vcpu);
         int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
         int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
@@@ -1249,27 -1245,16 +1248,27 @@@
   
         int (*get_msr_feature)(struct kvm_msr_entry *entry);
   
- -      int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
- -                                 uint16_t *vmcs_version);
- -      uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu);
- -
         bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
   
         bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
         int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
   };
   
+ +struct kvm_x86_nested_ops {
+ +      int (*check_events)(struct kvm_vcpu *vcpu);
+ +      int (*get_state)(struct kvm_vcpu *vcpu,
+ +                       struct kvm_nested_state __user *user_kvm_nested_state,
+ +                       unsigned user_data_size);
+ +      int (*set_state)(struct kvm_vcpu *vcpu,
+ +                       struct kvm_nested_state __user *user_kvm_nested_state,
+ +                       struct kvm_nested_state *kvm_state);
+ +      bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+ +
+ +      int (*enable_evmcs)(struct kvm_vcpu *vcpu,
+ +                          uint16_t *vmcs_version);
+ +      uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu);
+ +};
+ +
   struct kvm_x86_init_ops {
         int (*cpu_has_kvm_support)(void);
         int (*disabled_by_bios)(void);
@@@ -1463,11 -1448,10 +1462,12 @@@ bool kvm_rdpmc(struct kvm_vcpu *vcpu)
   
   void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
   void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
+ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
   void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
   void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
   void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
+ +bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+ +                                  struct x86_exception *fault);
   int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
                             gfn_t gfn, void *data, int offset, int len,
                             u32 access);
@@@ -1525,11 -1509,8 +1525,11 @@@ int kvm_emulate_hypercall(struct kvm_vc
   int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
                        void *insn, int insn_len);
   void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
+ +void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ +                          gva_t gva, hpa_t root_hpa);
   void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
- -void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
+ +void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
+ +                   bool skip_mmu_sync);
   
   void kvm_configure_mmu(bool enable_tdp, int tdp_page_level);
   
@@@ -1682,8 -1663,8 +1682,8 @@@ void kvm_set_msi_irq(struct kvm *kvm, s
   static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
   {
         /* We can only post Fixed and LowPrio IRQs */
-       return (irq->delivery_mode == dest_Fixed ||
-               irq->delivery_mode == dest_LowestPrio);
+       return (irq->delivery_mode == APIC_DM_FIXED ||
+               irq->delivery_mode == APIC_DM_LOWEST);
   }
   
   static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
diff --combined arch/x86/kvm/hyperv.c

index 2f96ff9e60ee0d9a61399aee588cb8c41f29ef8c,54d4b98b49e182ac5abd3910ac7a97ad60c251f1..f9d3b919823c344ecb531cc3c648bebb94f35f3a
--- 1/arch/x86/kvm/hyperv.c
--- 2/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@@ -1425,8 -1425,9 +1425,8 @@@ static u64 kvm_hv_flush_tlb(struct kvm_
          * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
          * analyze it here, flush TLB regardless of the specified address space.
          */
- -      kvm_make_vcpus_request_mask(kvm,
- -                                  KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
+ +      kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH,
-                                   vcpu_mask, &hv_vcpu->tlb_flush);
+                                   NULL, vcpu_mask, &hv_vcpu->tlb_flush);
   
   ret_success:
         /* We always do full TLB flush, set rep_done = rep_cnt. */
@@@ -1799,8 -1800,8 +1799,8 @@@ int kvm_vcpu_ioctl_get_hv_cpuid(struct 
         };
         int i, nent = ARRAY_SIZE(cpuid_entries);
   
- -      if (kvm_x86_ops.nested_get_evmcs_version)
- -              evmcs_ver = kvm_x86_ops.nested_get_evmcs_version(vcpu);
+ +      if (kvm_x86_ops.nested_ops->get_evmcs_version)
+ +              evmcs_ver = kvm_x86_ops.nested_ops->get_evmcs_version(vcpu);
   
         /* Skip NESTED_FEATURES if eVMCS is not supported */
         if (!evmcs_ver)
diff --combined arch/x86/kvm/svm/nested.c

index a7c3b3030e590d1c58bd2ccfd47711018774f3e5,9a2a62e5afebe9e17c099bbe04e204135c1c2ff6..1429f506fe9eca16cc2ab24175a4a5f43bb947c2
--- 1/arch/x86/kvm/svm/nested.c
--- 2/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@@ -19,6 -19,7 +19,7 @@@
   #include <linux/kernel.h>
   
   #include <asm/msr-index.h>
+ #include <asm/debugreg.h>
   
   #include "kvm_emulate.h"
   #include "trace.h"
@@@ -207,10 -208,6 +208,10 @@@ static bool nested_vmcb_checks(struct v
         if ((vmcb->save.efer & EFER_SVME) == 0)
                 return false;
   
+ +      if (((vmcb->save.cr0 & X86_CR0_CD) == 0) &&
+ +          (vmcb->save.cr0 & X86_CR0_NW))
+ +              return false;
+ +
         if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
                 return false;
   
@@@ -271,7 -268,7 +272,7 @@@ void enter_svm_guest_mode(struct vcpu_s
         svm->vmcb->save.rsp = nested_vmcb->save.rsp;
         svm->vmcb->save.rip = nested_vmcb->save.rip;
         svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
-       svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
+       svm->vcpu.arch.dr6  = nested_vmcb->save.dr6;
         svm->vmcb->save.cpl = nested_vmcb->save.cpl;
   
         svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
@@@ -283,7 -280,7 +284,7 @@@
         svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
         svm->nested.intercept            = nested_vmcb->control.intercept;
   
- -      svm_flush_tlb(&svm->vcpu, true);
+ +      svm_flush_tlb(&svm->vcpu);
         svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
         if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
                 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
@@@ -345,12 -342,8 +346,12 @@@ int nested_svm_vmrun(struct vcpu_svm *s
         struct kvm_host_map map;
         u64 vmcb_gpa;
   
- -      vmcb_gpa = svm->vmcb->save.rax;
+ +      if (is_smm(&svm->vcpu)) {
+ +              kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ +              return 1;
+ +      }
   
+ +      vmcb_gpa = svm->vmcb->save.rax;
         ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
         if (ret == -EINVAL) {
                 kvm_inject_gp(&svm->vcpu, 0);
@@@ -490,7 -483,7 +491,7 @@@ int nested_svm_vmexit(struct vcpu_svm *
         nested_vmcb->save.rsp    = vmcb->save.rsp;
         nested_vmcb->save.rax    = vmcb->save.rax;
         nested_vmcb->save.dr7    = vmcb->save.dr7;
-       nested_vmcb->save.dr6    = vmcb->save.dr6;
+       nested_vmcb->save.dr6    = svm->vcpu.arch.dr6;
         nested_vmcb->save.cpl    = vmcb->save.cpl;
   
         nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
@@@ -614,26 -607,45 +615,45 @@@ static int nested_svm_exit_handled_msr(
   /* DB exceptions for our internal use must not cause vmexit */
   static int nested_svm_intercept_db(struct vcpu_svm *svm)
   {
-       unsigned long dr6;
+       unsigned long dr6 = svm->vmcb->save.dr6;
+ 
+       /* Always catch it and pass it to userspace if debugging.  */
+       if (svm->vcpu.guest_debug &
+           (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+               return NESTED_EXIT_HOST;
   
         /* if we're not singlestepping, it's not ours */
         if (!svm->nmi_singlestep)
-               return NESTED_EXIT_DONE;
+               goto reflected_db;
   
         /* if it's not a singlestep exception, it's not ours */
-       if (kvm_get_dr(&svm->vcpu, 6, &dr6))
-               return NESTED_EXIT_DONE;
         if (!(dr6 & DR6_BS))
-               return NESTED_EXIT_DONE;
+               goto reflected_db;
   
         /* if the guest is singlestepping, it should get the vmexit */
         if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
                 disable_nmi_singlestep(svm);
-               return NESTED_EXIT_DONE;
+               goto reflected_db;
         }
   
         /* it's ours, the nested hypervisor must not see this one */
         return NESTED_EXIT_HOST;
+ 
+ reflected_db:
+       /*
+        * Synchronize guest DR6 here just like in kvm_deliver_exception_payload;
+        * it will be moved into the nested VMCB by nested_svm_vmexit.  Once
+        * exceptions will be moved to svm_check_nested_events, all this stuff
+        * will just go away and we could just return NESTED_EXIT_HOST
+        * unconditionally.  db_interception will queue the exception, which
+        * will be processed by svm_check_nested_events if a nested vmexit is
+        * required, and we will just use kvm_deliver_exception_payload to copy
+        * the payload to DR6 before vmexit.
+        */
+       WARN_ON(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT);
+       svm->vcpu.arch.dr6 &= ~(DR_TRAP_BITS | DR6_RTM);
+       svm->vcpu.arch.dr6 |= dr6 & ~DR6_FIXED_1;
+       return NESTED_EXIT_DONE;
   }
   
   static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
@@@ -690,6 -702,9 +710,9 @@@ static int nested_svm_intercept(struct 
                 if (svm->nested.intercept_exceptions & excp_bits) {
                         if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
                                 vmexit = nested_svm_intercept_db(svm);
+                       else if (exit_code == SVM_EXIT_EXCP_BASE + BP_VECTOR &&
+                                svm->vcpu.guest_debug & KVM_GUESTDBG_USE_SW_BP)
+                               vmexit = NESTED_EXIT_HOST;
                         else
                                 vmexit = NESTED_EXIT_DONE;
                 }
@@@ -788,7 -803,7 +811,7 @@@ static bool nested_exit_on_intr(struct 
         return (svm->nested.intercept & 1ULL);
   }
   
- -int svm_check_nested_events(struct kvm_vcpu *vcpu)
+ +static int svm_check_nested_events(struct kvm_vcpu *vcpu)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
         bool block_nested_events =
@@@ -829,7 -844,3 +852,7 @@@ int nested_svm_exit_special(struct vcpu
   
         return NESTED_EXIT_CONTINUE;
   }
+ +
+ +struct kvm_x86_nested_ops svm_nested_ops = {
+ +      .check_events = svm_check_nested_events,
+ +};
diff --combined arch/x86/kvm/svm/svm.c

index c86f7278509bbdcf257d18cd8231aa79780a4c40,a862c768fd542695614d335b033dd18d1c3f86f9..b627564e41f9e6595c68592d9311b2d59bb116f3
--- 1/arch/x86/kvm/svm/svm.c
--- 2/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@@ -33,7 -33,6 +33,7 @@@
   #include <asm/debugreg.h>
   #include <asm/kvm_para.h>
   #include <asm/irq_remapping.h>
+ +#include <asm/mce.h>
   #include <asm/spec-ctrl.h>
   #include <asm/cpu_device_id.h>
   
@@@ -1604,7 -1603,7 +1604,7 @@@ int svm_set_cr4(struct kvm_vcpu *vcpu, 
                 return 1;
   
         if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
- -              svm_flush_tlb(vcpu, true);
+ +              svm_flush_tlb(vcpu);
   
         vcpu->arch.cr4 = cr4;
         if (!npt_enabled)
@@@ -1673,17 -1672,14 +1673,14 @@@ static void new_asid(struct vcpu_svm *s
         mark_dirty(svm->vmcb, VMCB_ASID);
   }
   
- static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
+ static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value)
   {
-       return to_svm(vcpu)->vmcb->save.dr6;
- }
- 
- static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
- {
-       struct vcpu_svm *svm = to_svm(vcpu);
+       struct vmcb *vmcb = svm->vmcb;
   
-       svm->vmcb->save.dr6 = value;
-       mark_dirty(svm->vmcb, VMCB_DR);
+       if (unlikely(value != vmcb->save.dr6)) {
+               vmcb->save.dr6 = value;
+               mark_dirty(vmcb, VMCB_DR);
+       }
   }
   
   static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
@@@ -1694,9 -1690,12 +1691,12 @@@
         get_debugreg(vcpu->arch.db[1], 1);
         get_debugreg(vcpu->arch.db[2], 2);
         get_debugreg(vcpu->arch.db[3], 3);
-       vcpu->arch.dr6 = svm_get_dr6(vcpu);
+       /*
+        * We cannot reset svm->vmcb->save.dr6 to DR6_FIXED_1|DR6_RTM here,
+        * because db_interception might need it.  We can do it before vmentry.
+        */
+       vcpu->arch.dr6 = svm->vmcb->save.dr6;
         vcpu->arch.dr7 = svm->vmcb->save.dr7;
- 
         vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
         set_dr_intercepts(svm);
   }
@@@ -1740,7 -1739,8 +1740,8 @@@ static int db_interception(struct vcpu_
         if (!(svm->vcpu.guest_debug &
               (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
                 !svm->nmi_singlestep) {
-               kvm_queue_exception(&svm->vcpu, DB_VECTOR);
+               u32 payload = (svm->vmcb->save.dr6 ^ DR6_RTM) & ~DR6_FIXED_1;
+               kvm_queue_exception_p(&svm->vcpu, DB_VECTOR, payload);
                 return 1;
         }
   
@@@ -1753,6 -1753,8 +1754,8 @@@
         if (svm->vcpu.guest_debug &
             (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
                 kvm_run->exit_reason = KVM_EXIT_DEBUG;
+               kvm_run->debug.arch.dr6 = svm->vmcb->save.dr6;
+               kvm_run->debug.arch.dr7 = svm->vmcb->save.dr7;
                 kvm_run->debug.arch.pc =
                         svm->vmcb->save.cs.base + svm->vmcb->save.rip;
                 kvm_run->debug.arch.exception = DB_VECTOR;
@@@ -1840,25 -1842,6 +1843,25 @@@ static bool is_erratum_383(void
         return true;
   }
   
+ +/*
+ + * Trigger machine check on the host. We assume all the MSRs are already set up
+ + * by the CPU and that we still run on the same CPU as the MCE occurred on.
+ + * We pass a fake environment to the machine check handler because we want
+ + * the guest to be always treated like user space, no matter what context
+ + * it used internally.
+ + */
+ +static void kvm_machine_check(void)
+ +{
+ +#if defined(CONFIG_X86_MCE)
+ +      struct pt_regs regs = {
+ +              .cs = 3, /* Fake ring 3 no matter what the guest ran on */
+ +              .flags = X86_EFLAGS_IF,
+ +      };
+ +
+ +      do_machine_check(&regs, 0);
+ +#endif
+ +}
+ +
   static void svm_handle_mce(struct vcpu_svm *svm)
   {
         if (is_erratum_383()) {
@@@ -1877,7 -1860,11 +1880,7 @@@
          * On an #MC intercept the MCE handler is not called automatically in
          * the host. So do it by hand here.
          */
- -      asm volatile (
- -              "int $0x12\n");
- -      /* not sure if we ever come back to this point */
- -
- -      return;
+ +      kvm_machine_check();
   }
   
   static int mc_interception(struct vcpu_svm *svm)
@@@ -3169,17 -3156,10 +3172,17 @@@ static int svm_set_identity_map_addr(st
         return 0;
   }
   
- -void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+ +void svm_flush_tlb(struct kvm_vcpu *vcpu)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
   
+ +      /*
+ +       * Flush only the current ASID even if the TLB flush was invoked via
+ +       * kvm_flush_remote_tlbs().  Although flushing remote TLBs requires all
+ +       * ASIDs to be flushed, KVM uses a single ASID for L1 and L2, and
+ +       * unconditionally does a TLB flush on both nested VM-Enter and nested
+ +       * VM-Exit (via kvm_mmu_reset_context()).
+ +       */
         if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
                 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
         else
@@@ -3299,21 -3279,10 +3302,21 @@@ static void svm_cancel_injection(struc
         svm_complete_interrupts(svm);
   }
   
+ +static enum exit_fastpath_completion svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+ +{
+ +      if (!is_guest_mode(vcpu) &&
+ +          to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+ +          to_svm(vcpu)->vmcb->control.exit_info_1)
+ +              return handle_fastpath_set_msr_irqoff(vcpu);
+ +
+ +      return EXIT_FASTPATH_NONE;
+ +}
+ +
   void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
   
- -static void svm_vcpu_run(struct kvm_vcpu *vcpu)
+ +static enum exit_fastpath_completion svm_vcpu_run(struct kvm_vcpu *vcpu)
   {
+ +      enum exit_fastpath_completion exit_fastpath;
         struct vcpu_svm *svm = to_svm(vcpu);
   
         svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
@@@ -3325,7 -3294,7 +3328,7 @@@
          * again.
          */
         if (unlikely(svm->nested.exit_required))
- -              return;
+ +              return EXIT_FASTPATH_NONE;
   
         /*
          * Disable singlestep if we're injecting an interrupt/exception.
@@@ -3349,6 -3318,15 +3352,15 @@@
   
         svm->vmcb->save.cr2 = vcpu->arch.cr2;
   
+       /*
+        * Run with all-zero DR6 unless needed, so that we can get the exact cause
+        * of a #DB.
+        */
+       if (unlikely(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
+               svm_set_dr6(svm, vcpu->arch.dr6);
+       else
+               svm_set_dr6(svm, DR6_FIXED_1 | DR6_RTM);
+ 
         clgi();
         kvm_load_guest_xsave_state(vcpu);
   
@@@ -3409,7 -3387,6 +3421,7 @@@
         stgi();
   
         /* Any pending NMI will happen here */
+ +      exit_fastpath = svm_exit_handlers_fastpath(vcpu);
   
         if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
                 kvm_after_interrupt(&svm->vcpu);
@@@ -3438,7 -3415,6 +3450,7 @@@
                 svm_handle_mce(svm);
   
         mark_all_clean(svm->vmcb);
+ +      return exit_fastpath;
   }
   
   static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root)
@@@ -3740,8 -3716,13 +3752,8 @@@ out
         return ret;
   }
   
- -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu,
- -      enum exit_fastpath_completion *exit_fastpath)
+ +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
   {
- -      if (!is_guest_mode(vcpu) &&
- -          to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
- -          to_svm(vcpu)->vmcb->control.exit_info_1)
- -              *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
   }
   
   static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
@@@ -3837,13 -3818,6 +3849,13 @@@ static bool svm_need_emulation_on_page_
         bool smap = cr4 & X86_CR4_SMAP;
         bool is_user = svm_get_cpl(vcpu) == 3;
   
+ +      /*
+ +       * If RIP is invalid, go ahead with emulation which will cause an
+ +       * internal error exit.
+ +       */
+ +      if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
+ +              return true;
+ +
         /*
          * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
          *
@@@ -3902,9 -3876,9 +3914,9 @@@ static bool svm_apic_init_signal_blocke
         /*
          * TODO: Last condition latch INIT signals on vCPU when
          * vCPU is in guest-mode and vmcb12 defines intercept on INIT.
- -       * To properly emulate the INIT intercept, SVM should implement
- -       * kvm_x86_ops.check_nested_events() and call nested_svm_vmexit()
- -       * there if an INIT signal is pending.
+ +       * To properly emulate the INIT intercept,
+ +       * svm_check_nested_events() should call nested_svm_vmexit()
+ +       * if an INIT signal is pending.
          */
         return !gif_set(svm) ||
                    (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
@@@ -3967,18 -3941,14 +3979,16 @@@ static struct kvm_x86_ops svm_x86_ops _
         .set_idt = svm_set_idt,
         .get_gdt = svm_get_gdt,
         .set_gdt = svm_set_gdt,
-       .get_dr6 = svm_get_dr6,
-       .set_dr6 = svm_set_dr6,
         .set_dr7 = svm_set_dr7,
         .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
         .cache_reg = svm_cache_reg,
         .get_rflags = svm_get_rflags,
         .set_rflags = svm_set_rflags,
   
- -      .tlb_flush = svm_flush_tlb,
+ +      .tlb_flush_all = svm_flush_tlb,
+ +      .tlb_flush_current = svm_flush_tlb,
         .tlb_flush_gva = svm_flush_tlb_gva,
+ +      .tlb_flush_guest = svm_flush_tlb,
   
         .run = svm_vcpu_run,
         .handle_exit = handle_exit,
@@@ -4032,8 -4002,6 +4042,8 @@@
         .sched_in = svm_sched_in,
   
         .pmu_ops = &amd_pmu_ops,
+ +      .nested_ops = &svm_nested_ops,
+ +
         .deliver_posted_interrupt = svm_deliver_avic_intr,
         .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
         .update_pi_irte = svm_update_pi_irte,
@@@ -4048,9 -4016,14 +4058,9 @@@
         .mem_enc_reg_region = svm_register_enc_region,
         .mem_enc_unreg_region = svm_unregister_enc_region,
   
- -      .nested_enable_evmcs = NULL,
- -      .nested_get_evmcs_version = NULL,
- -
         .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
   
         .apic_init_signal_blocked = svm_apic_init_signal_blocked,
- -
- -      .check_nested_events = svm_check_nested_events,
   };
   
   static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --combined arch/x86/kvm/vmx/nested.c

index b516c24494e38a63f89e271cd4474ea33bbf5c3b,e44f33c82332505b057ae016cf0d5a5c0cba23d0..b644bbf85460d41d5c20d1f6c1eb24191ef7bf26
--- 1/arch/x86/kvm/vmx/nested.c
--- 2/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@@ -307,7 -307,7 +307,7 @@@ static void vmx_switch_vmcs(struct kvm_
         vmx_sync_vmcs_host_state(vmx, prev);
         put_cpu();
   
- -      vmx_segment_cache_clear(vmx);
+ +      vmx_register_cache_reset(vcpu);
   }
   
   /*
@@@ -328,19 -328,19 +328,19 @@@ static void nested_ept_inject_page_faul
   {
         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
         struct vcpu_vmx *vmx = to_vmx(vcpu);
- -      u32 exit_reason;
+ +      u32 vm_exit_reason;
         unsigned long exit_qualification = vcpu->arch.exit_qualification;
   
         if (vmx->nested.pml_full) {
- -              exit_reason = EXIT_REASON_PML_FULL;
+ +              vm_exit_reason = EXIT_REASON_PML_FULL;
                 vmx->nested.pml_full = false;
                 exit_qualification &= INTR_INFO_UNBLOCK_NMI;
         } else if (fault->error_code & PFERR_RSVD_MASK)
- -              exit_reason = EXIT_REASON_EPT_MISCONFIG;
+ +              vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
         else
- -              exit_reason = EXIT_REASON_EPT_VIOLATION;
+ +              vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
   
- -      nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
+ +      nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
         vmcs12->guest_physical_address = fault->address;
   }
   
@@@ -1073,48 -1073,6 +1073,48 @@@ static bool nested_cr3_valid(struct kvm
         return (val & invalid_mask) == 0;
   }
   
+ +/*
+ + * Returns true if the MMU needs to be sync'd on nested VM-Enter/VM-Exit.
+ + * tl;dr: the MMU needs a sync if L0 is using shadow paging and L1 didn't
+ + * enable VPID for L2 (implying it expects a TLB flush on VMX transitions).
+ + * Here's why.
+ + *
+ + * If EPT is enabled by L0 a sync is never needed:
+ + * - if it is disabled by L1, then L0 is not shadowing L1 or L2 PTEs, there
+ + *   cannot be unsync'd SPTEs for either L1 or L2.
+ + *
+ + * - if it is also enabled by L1, then L0 doesn't need to sync on VM-Enter
+ + *   VM-Enter as VM-Enter isn't required to invalidate guest-physical mappings
+ + *   (irrespective of VPID), i.e. L1 can't rely on the (virtual) CPU to flush
+ + *   stale guest-physical mappings for L2 from the TLB.  And as above, L0 isn't
+ + *   shadowing L1 PTEs so there are no unsync'd SPTEs to sync on VM-Exit.
+ + *
+ + * If EPT is disabled by L0:
+ + * - if VPID is enabled by L1 (for L2), the situation is similar to when L1
+ + *   enables EPT: L0 doesn't need to sync as VM-Enter and VM-Exit aren't
+ + *   required to invalidate linear mappings (EPT is disabled so there are
+ + *   no combined or guest-physical mappings), i.e. L1 can't rely on the
+ + *   (virtual) CPU to flush stale linear mappings for either L2 or itself (L1).
+ + *
+ + * - however if VPID is disabled by L1, then a sync is needed as L1 expects all
+ + *   linear mappings (EPT is disabled so there are no combined or guest-physical
+ + *   mappings) to be invalidated on both VM-Enter and VM-Exit.
+ + *
+ + * Note, this logic is subtly different than nested_has_guest_tlb_tag(), which
+ + * additionally checks that L2 has been assigned a VPID (when EPT is disabled).
+ + * Whether or not L2 has been assigned a VPID by L0 is irrelevant with respect
+ + * to L1's expectations, e.g. L0 needs to invalidate hardware TLB entries if L2
+ + * doesn't have a unique VPID to prevent reusing L1's entries (assuming L1 has
+ + * been assigned a VPID), but L0 doesn't need to do a MMU sync because L1
+ + * doesn't expect stale (virtual) TLB entries to be flushed, i.e. L1 doesn't
+ + * know that L0 will flush the TLB and so L1 will do INVVPID as needed to flush
+ + * stale TLB entries, at which point L0 will sync L2's MMU.
+ + */
+ +static bool nested_vmx_transition_mmu_sync(struct kvm_vcpu *vcpu)
+ +{
+ +      return !enable_ept && !nested_cpu_has_vpid(get_vmcs12(vcpu));
+ +}
+ +
   /*
    * Load guest's/host's cr3 at nested entry/exit.  @nested_ept is true if we are
    * emulating VM-Entry into a guest with EPT enabled.  On failure, the expected
@@@ -1142,14 -1100,8 +1142,14 @@@ static int nested_vmx_load_cr3(struct k
                 }
         }
   
+ +      /*
+ +       * Unconditionally skip the TLB flush on fast CR3 switch, all TLB
+ +       * flushes are handled by nested_vmx_transition_tlb_flush().  See
+ +       * nested_vmx_transition_mmu_sync for details on skipping the MMU sync.
+ +       */
         if (!nested_ept)
- -              kvm_mmu_new_cr3(vcpu, cr3, false);
+ +              kvm_mmu_new_pgd(vcpu, cr3, true,
+ +                              !nested_vmx_transition_mmu_sync(vcpu));
   
         vcpu->arch.cr3 = cr3;
         kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
@@@ -1180,48 -1132,11 +1180,48 @@@ static bool nested_has_guest_tlb_tag(st
                (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
   }
   
- -static u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
+ +static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
+ +                                          struct vmcs12 *vmcs12,
+ +                                          bool is_vmenter)
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
   
- -      return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid;
+ +      /*
+ +       * If VPID is disabled, linear and combined mappings are flushed on
+ +       * VM-Enter/VM-Exit, and guest-physical mappings are valid only for
+ +       * their associated EPTP.
+ +       */
+ +      if (!enable_vpid)
+ +              return;
+ +
+ +      /*
+ +       * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
+ +       * for *all* contexts to be flushed on VM-Enter/VM-Exit.
+ +       *
+ +       * If VPID is enabled and used by vmc12, but L2 does not have a unique
+ +       * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate
+ +       * a VPID for L2, flush the current context as the effective ASID is
+ +       * common to both L1 and L2.
+ +       *
+ +       * Defer the flush so that it runs after vmcs02.EPTP has been set by
+ +       * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid
+ +       * redundant flushes further down the nested pipeline.
+ +       *
+ +       * If a TLB flush isn't required due to any of the above, and vpid12 is
+ +       * changing then the new "virtual" VPID (vpid12) will reuse the same
+ +       * "real" VPID (vpid02), and so needs to be sync'd.  There is no direct
+ +       * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for
+ +       * all nested vCPUs.
+ +       */
+ +      if (!nested_cpu_has_vpid(vmcs12)) {
+ +              kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ +      } else if (!nested_has_guest_tlb_tag(vcpu)) {
+ +              kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ +      } else if (is_vmenter &&
+ +                 vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
+ +              vmx->nested.last_vpid = vmcs12->virtual_processor_id;
+ +              vpid_sync_context(nested_get_vpid02(vcpu));
+ +      }
   }
   
   static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
@@@ -1785,6 -1700,10 +1785,6 @@@ static int copy_enlightened_to_vmcs12(s
          * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
          * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
          * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
- -       * vmcs12->cr3_target_value0 = evmcs->cr3_target_value0;
- -       * vmcs12->cr3_target_value1 = evmcs->cr3_target_value1;
- -       * vmcs12->cr3_target_value2 = evmcs->cr3_target_value2;
- -       * vmcs12->cr3_target_value3 = evmcs->cr3_target_value3;
          * vmcs12->page_fault_error_code_mask =
          *              evmcs->page_fault_error_code_mask;
          * vmcs12->page_fault_error_code_match =
@@@ -1858,6 -1777,10 +1858,6 @@@ static int copy_vmcs12_to_enlightened(s
          * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
          * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
          * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
- -       * evmcs->cr3_target_value0 = vmcs12->cr3_target_value0;
- -       * evmcs->cr3_target_value1 = vmcs12->cr3_target_value1;
- -       * evmcs->cr3_target_value2 = vmcs12->cr3_target_value2;
- -       * evmcs->cr3_target_value3 = vmcs12->cr3_target_value3;
          * evmcs->tpr_threshold = vmcs12->tpr_threshold;
          * evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
          * evmcs->exception_bitmap = vmcs12->exception_bitmap;
@@@ -2524,7 -2447,32 +2524,7 @@@ static int prepare_vmcs02(struct kvm_vc
         if (kvm_has_tsc_control)
                 decache_tsc_multiplier(vmx);
   
- -      if (enable_vpid) {
- -              /*
- -               * There is no direct mapping between vpid02 and vpid12, the
- -               * vpid02 is per-vCPU for L0 and reused while the value of
- -               * vpid12 is changed w/ one invvpid during nested vmentry.
- -               * The vpid12 is allocated by L1 for L2, so it will not
- -               * influence global bitmap(for vpid01 and vpid02 allocation)
- -               * even if spawn a lot of nested vCPUs.
- -               */
- -              if (nested_cpu_has_vpid(vmcs12) && nested_has_guest_tlb_tag(vcpu)) {
- -                      if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
- -                              vmx->nested.last_vpid = vmcs12->virtual_processor_id;
- -                              __vmx_flush_tlb(vcpu, nested_get_vpid02(vcpu), false);
- -                      }
- -              } else {
- -                      /*
- -                       * If L1 use EPT, then L0 needs to execute INVEPT on
- -                       * EPTP02 instead of EPTP01. Therefore, delay TLB
- -                       * flush until vmcs02->eptp is fully updated by
- -                       * KVM_REQ_LOAD_MMU_PGD. Note that this assumes
- -                       * KVM_REQ_TLB_FLUSH is evaluated after
- -                       * KVM_REQ_LOAD_MMU_PGD in vcpu_enter_guest().
- -                       */
- -                      kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
- -              }
- -      }
+ +      nested_vmx_transition_tlb_flush(vcpu, vmcs12, true);
   
         if (nested_cpu_has_ept(vmcs12))
                 nested_ept_init_mmu_context(vcpu);
@@@ -3250,9 -3198,6 +3250,9 @@@ enum nvmx_vmentry_status nested_vmx_ent
         u32 exit_reason = EXIT_REASON_INVALID_STATE;
         u32 exit_qual;
   
+ +      if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+ +              kvm_vcpu_flush_tlb_current(vcpu);
+ +
         evaluate_pending_interrupts = exec_controls_get(vmx) &
                 (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
         if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
@@@ -3994,11 -3939,11 +3994,11 @@@ static void sync_vmcs02_to_vmcs12(struc
    * which already writes to vmcs12 directly.
    */
   static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
- -                         u32 exit_reason, u32 exit_intr_info,
+ +                         u32 vm_exit_reason, u32 exit_intr_info,
                            unsigned long exit_qualification)
   {
         /* update exit information fields: */
- -      vmcs12->vm_exit_reason = exit_reason;
+ +      vmcs12->vm_exit_reason = vm_exit_reason;
         vmcs12->exit_qualification = exit_qualification;
         vmcs12->vm_exit_intr_info = exit_intr_info;
   
@@@ -4095,7 -4040,24 +4095,7 @@@ static void load_vmcs12_host_state(stru
         if (!enable_ept)
                 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
   
- -      /*
- -       * If vmcs01 doesn't use VPID, CPU flushes TLB on every
- -       * VMEntry/VMExit. Thus, no need to flush TLB.
- -       *
- -       * If vmcs12 doesn't use VPID, L1 expects TLB to be
- -       * flushed on every VMEntry/VMExit.
- -       *
- -       * Otherwise, we can preserve TLB entries as long as we are
- -       * able to tag L1 TLB entries differently than L2 TLB entries.
- -       *
- -       * If vmcs12 uses EPT, we need to execute this flush on EPTP01
- -       * and therefore we request the TLB flush to happen only after VMCS EPTP
- -       * has been set by KVM_REQ_LOAD_MMU_PGD.
- -       */
- -      if (enable_vpid &&
- -          (!nested_cpu_has_vpid(vmcs12) || !nested_has_guest_tlb_tag(vcpu))) {
- -              kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
- -      }
+ +      nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
   
         vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
         vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
@@@ -4242,7 -4204,7 +4242,7 @@@ static void nested_vmx_restore_host_sta
          * VMFail, like everything else we just need to ensure our
          * software model is up-to-date.
          */
- -      if (enable_ept)
+ +      if (enable_ept && is_pae_paging(vcpu))
                 ept_save_pdptrs(vcpu);
   
         kvm_mmu_reset_context(vcpu);
@@@ -4310,7 -4272,7 +4310,7 @@@ vmabort
    * and modify vmcs12 to make it see what it would expect to see there if
    * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
    */
- -void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
+ +void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
                        u32 exit_intr_info, unsigned long exit_qualification)
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
@@@ -4319,10 -4281,6 +4319,10 @@@
         /* trying to cancel vmlaunch/vmresume is a bug */
         WARN_ON_ONCE(vmx->nested.nested_run_pending);
   
+ +      /* Service the TLB flush request for L2 before switching to L1. */
+ +      if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+ +              kvm_vcpu_flush_tlb_current(vcpu);
+ +
         leave_guest_mode(vcpu);
   
         if (nested_cpu_has_preemption_timer(vmcs12))
@@@ -4334,9 -4292,9 +4334,9 @@@
         if (likely(!vmx->fail)) {
                 sync_vmcs02_to_vmcs12(vcpu, vmcs12);
   
- -              if (exit_reason != -1)
- -                      prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
- -                                     exit_qualification);
+ +              if (vm_exit_reason != -1)
+ +                      prepare_vmcs12(vcpu, vmcs12, vm_exit_reason,
+ +                                     exit_intr_info, exit_qualification);
   
                 /*
                  * Must happen outside of sync_vmcs02_to_vmcs12() as it will
@@@ -4386,20 -4344,20 +4386,20 @@@
         kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
         vmx->nested.pi_desc = NULL;
   
- -      /*
- -       * We are now running in L2, mmu_notifier will force to reload the
- -       * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
- -       */
- -      kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
+ +      if (vmx->nested.reload_vmcs01_apic_access_page) {
+ +              vmx->nested.reload_vmcs01_apic_access_page = false;
+ +              kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
+ +      }
   
- -      if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs))
+ +      if ((vm_exit_reason != -1) &&
+ +          (enable_shadow_vmcs || vmx->nested.hv_evmcs))
                 vmx->nested.need_vmcs12_to_shadow_sync = true;
   
         /* in case we halted in L2 */
         vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
   
         if (likely(!vmx->fail)) {
- -              if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
+ +              if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
                     nested_exit_intr_ack_set(vcpu)) {
                         int irq = kvm_cpu_get_interrupt(vcpu);
                         WARN_ON(irq < 0);
@@@ -4407,7 -4365,7 +4407,7 @@@
                                 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
                 }
   
- -              if (exit_reason != -1)
+ +              if (vm_exit_reason != -1)
                         trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
                                                        vmcs12->exit_qualification,
                                                        vmcs12->idt_vectoring_info_field,
@@@ -4596,13 -4554,13 +4596,13 @@@ static int nested_vmx_get_vmptr(struct 
         gva_t gva;
         struct x86_exception e;
   
- -      if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+ +      if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
                                 vmcs_read32(VMX_INSTRUCTION_INFO), false,
                                 sizeof(*vmpointer), &gva))
                 return 1;
   
         if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
- -              kvm_inject_page_fault(vcpu, &e);
+ +              kvm_inject_emulated_page_fault(vcpu, &e);
                 return 1;
         }
   
@@@ -4861,7 -4819,7 +4861,7 @@@ static int handle_vmread(struct kvm_vcp
   {
         struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
                                                     : get_vmcs12(vcpu);
- -      unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ +      unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
         u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         struct x86_exception e;
@@@ -4911,7 -4869,7 +4911,7 @@@
                         return 1;
                 /* _system ok, nested_vmx_check_permission has verified cpl=0 */
                 if (kvm_write_guest_virt_system(vcpu, gva, &value, len, &e)) {
- -                      kvm_inject_page_fault(vcpu, &e);
+ +                      kvm_inject_emulated_page_fault(vcpu, &e);
                         return 1;
                 }
         }
@@@ -4947,7 -4905,7 +4947,7 @@@ static int handle_vmwrite(struct kvm_vc
   {
         struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
                                                     : get_vmcs12(vcpu);
- -      unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ +      unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
         u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         struct x86_exception e;
@@@ -4985,7 -4943,7 +4985,7 @@@
                                         instr_info, false, len, &gva))
                         return 1;
                 if (kvm_read_guest_virt(vcpu, gva, &value, len, &e)) {
- -                      kvm_inject_page_fault(vcpu, &e);
+ +                      kvm_inject_emulated_page_fault(vcpu, &e);
                         return 1;
                 }
         }
@@@ -5132,7 -5090,7 +5132,7 @@@ static int handle_vmptrld(struct kvm_vc
   /* Emulate the VMPTRST instruction */
   static int handle_vmptrst(struct kvm_vcpu *vcpu)
   {
- -      unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION);
+ +      unsigned long exit_qual = vmx_get_exit_qual(vcpu);
         u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
         gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
         struct x86_exception e;
@@@ -5150,33 -5108,23 +5150,33 @@@
         /* *_system ok, nested_vmx_check_permission has verified cpl=0 */
         if (kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
                                         sizeof(gpa_t), &e)) {
- -              kvm_inject_page_fault(vcpu, &e);
+ +              kvm_inject_emulated_page_fault(vcpu, &e);
                 return 1;
         }
         return nested_vmx_succeed(vcpu);
   }
   
+ +#define EPTP_PA_MASK   GENMASK_ULL(51, 12)
+ +
+ +static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
+ +{
+ +      return VALID_PAGE(root_hpa) &&
+ +              ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
+ +}
+ +
   /* Emulate the INVEPT instruction */
   static int handle_invept(struct kvm_vcpu *vcpu)
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         u32 vmx_instruction_info, types;
- -      unsigned long type;
+ +      unsigned long type, roots_to_free;
+ +      struct kvm_mmu *mmu;
         gva_t gva;
         struct x86_exception e;
         struct {
                 u64 eptp, gpa;
         } operand;
+ +      int i;
   
         if (!(vmx->nested.msrs.secondary_ctls_high &
               SECONDARY_EXEC_ENABLE_EPT) ||
@@@ -5200,49 -5148,27 +5200,49 @@@
         /* According to the Intel VMX instruction reference, the memory
          * operand is read even if it isn't needed (e.g., for type==global)
          */
- -      if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+ +      if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
                         vmx_instruction_info, false, sizeof(operand), &gva))
                 return 1;
         if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
- -              kvm_inject_page_fault(vcpu, &e);
+ +              kvm_inject_emulated_page_fault(vcpu, &e);
                 return 1;
         }
   
- -      switch (type) {
- -      case VMX_EPT_EXTENT_GLOBAL:
- -      case VMX_EPT_EXTENT_CONTEXT:
         /*
- -       * TODO: Sync the necessary shadow EPT roots here, rather than
- -       * at the next emulated VM-entry.
+ +       * Nested EPT roots are always held through guest_mmu,
+ +       * not root_mmu.
          */
+ +      mmu = &vcpu->arch.guest_mmu;
+ +
+ +      switch (type) {
+ +      case VMX_EPT_EXTENT_CONTEXT:
+ +              if (!nested_vmx_check_eptp(vcpu, operand.eptp))
+ +                      return nested_vmx_failValid(vcpu,
+ +                              VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ +
+ +              roots_to_free = 0;
+ +              if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd,
+ +                                          operand.eptp))
+ +                      roots_to_free |= KVM_MMU_ROOT_CURRENT;
+ +
+ +              for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+ +                      if (nested_ept_root_matches(mmu->prev_roots[i].hpa,
+ +                                                  mmu->prev_roots[i].pgd,
+ +                                                  operand.eptp))
+ +                              roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
+ +              }
+ +              break;
+ +      case VMX_EPT_EXTENT_GLOBAL:
+ +              roots_to_free = KVM_MMU_ROOTS_ALL;
                 break;
         default:
-               BUG_ON(1);
+               BUG();
                 break;
         }
   
+ +      if (roots_to_free)
+ +              kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
+ +
         return nested_vmx_succeed(vcpu);
   }
   
@@@ -5282,11 -5208,11 +5282,11 @@@ static int handle_invvpid(struct kvm_vc
         /* according to the intel vmx instruction reference, the memory
          * operand is read even if it isn't needed (e.g., for type==global)
          */
- -      if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+ +      if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
                         vmx_instruction_info, false, sizeof(operand), &gva))
                 return 1;
         if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
- -              kvm_inject_page_fault(vcpu, &e);
+ +              kvm_inject_emulated_page_fault(vcpu, &e);
                 return 1;
         }
         if (operand.vpid >> 16)
@@@ -5300,37 -5226,27 +5300,37 @@@
                     is_noncanonical_address(operand.gla, vcpu))
                         return nested_vmx_failValid(vcpu,
                                 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
- -              if (cpu_has_vmx_invvpid_individual_addr()) {
- -                      __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR,
- -                              vpid02, operand.gla);
- -              } else
- -                      __vmx_flush_tlb(vcpu, vpid02, false);
+ +              vpid_sync_vcpu_addr(vpid02, operand.gla);
                 break;
         case VMX_VPID_EXTENT_SINGLE_CONTEXT:
         case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
                 if (!operand.vpid)
                         return nested_vmx_failValid(vcpu,
                                 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
- -              __vmx_flush_tlb(vcpu, vpid02, false);
+ +              vpid_sync_context(vpid02);
                 break;
         case VMX_VPID_EXTENT_ALL_CONTEXT:
- -              __vmx_flush_tlb(vcpu, vpid02, false);
+ +              vpid_sync_context(vpid02);
                 break;
         default:
                 WARN_ON_ONCE(1);
                 return kvm_skip_emulated_instruction(vcpu);
         }
   
+ +      /*
+ +       * Sync the shadow page tables if EPT is disabled, L1 is invalidating
+ +       * linear mappings for L2 (tagged with L2's VPID).  Free all roots as
+ +       * VPIDs are not tracked in the MMU role.
+ +       *
+ +       * Note, this operates on root_mmu, not guest_mmu, as L1 and L2 share
+ +       * an MMU when EPT is disabled.
+ +       *
+ +       * TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR.
+ +       */
+ +      if (!enable_ept)
+ +              kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu,
+ +                                 KVM_MMU_ROOTS_ALL);
+ +
         return nested_vmx_succeed(vcpu);
   }
   
@@@ -5411,8 -5327,8 +5411,8 @@@ static int handle_vmfunc(struct kvm_vcp
   
   fail:
         nested_vmx_vmexit(vcpu, vmx->exit_reason,
- -                        vmcs_read32(VM_EXIT_INTR_INFO),
- -                        vmcs_readl(EXIT_QUALIFICATION));
+ +                        vmx_get_intr_info(vcpu),
+ +                        vmx_get_exit_qual(vcpu));
         return 1;
   }
   
@@@ -5463,7 -5379,7 +5463,7 @@@ static bool nested_vmx_exit_handled_io(
         if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
                 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
   
- -      exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ +      exit_qualification = vmx_get_exit_qual(vcpu);
   
         port = exit_qualification >> 16;
         size = (exit_qualification & 7) + 1;
@@@ -5517,7 -5433,7 +5517,7 @@@ static bool nested_vmx_exit_handled_msr
   static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
         struct vmcs12 *vmcs12)
   {
- -      unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ +      unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
         int cr = exit_qualification & 15;
         int reg;
         unsigned long val;
@@@ -5533,6 -5449,15 +5533,6 @@@
                                 return true;
                         break;
                 case 3:
- -                      if ((vmcs12->cr3_target_count >= 1 &&
- -                                      vmcs12->cr3_target_value0 == val) ||
- -                              (vmcs12->cr3_target_count >= 2 &&
- -                                      vmcs12->cr3_target_value1 == val) ||
- -                              (vmcs12->cr3_target_count >= 3 &&
- -                                      vmcs12->cr3_target_value2 == val) ||
- -                              (vmcs12->cr3_target_count >= 4 &&
- -                                      vmcs12->cr3_target_value3 == val))
- -                              return false;
                         if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
                                 return true;
                         break;
@@@ -5626,85 -5551,49 +5626,85 @@@ static bool nested_vmx_exit_handled_mtf
   }
   
   /*
- - * Return true if we should exit from L2 to L1 to handle an exit, or false if we
- - * should handle it ourselves in L0 (and then continue L2). Only call this
- - * when in is_guest_mode (L2).
+ + * Return true if L0 wants to handle an exit from L2 regardless of whether or not
+ + * L1 wants the exit.  Only call this when in is_guest_mode (L2).
    */
- -bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
+ +static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason)
   {
- -      u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- -      struct vcpu_vmx *vmx = to_vmx(vcpu);
- -      struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- -
- -      WARN_ON_ONCE(vmx->nested.nested_run_pending);
- -
- -      if (unlikely(vmx->fail)) {
- -              trace_kvm_nested_vmenter_failed(
- -                      "hardware VM-instruction error: ",
- -                      vmcs_read32(VM_INSTRUCTION_ERROR));
- -              return true;
- -      }
- -
- -      trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
- -                              vmcs_readl(EXIT_QUALIFICATION),
- -                              vmx->idt_vectoring_info,
- -                              intr_info,
- -                              vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
- -                              KVM_ISA_VMX);
+ +      u32 intr_info;
   
         switch (exit_reason) {
         case EXIT_REASON_EXCEPTION_NMI:
+ +              intr_info = vmx_get_intr_info(vcpu);
                 if (is_nmi(intr_info))
- -                      return false;
+ +                      return true;
                 else if (is_page_fault(intr_info))
- -                      return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
+ +                      return vcpu->arch.apf.host_apf_reason || !enable_ept;
                 else if (is_debug(intr_info) &&
                          vcpu->guest_debug &
                          (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
- -                      return false;
+ +                      return true;
                 else if (is_breakpoint(intr_info) &&
                          vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
- -                      return false;
+ +                      return true;
+ +              return false;
+ +      case EXIT_REASON_EXTERNAL_INTERRUPT:
+ +              return true;
+ +      case EXIT_REASON_MCE_DURING_VMENTRY:
+ +              return true;
+ +      case EXIT_REASON_EPT_VIOLATION:
+ +              /*
+ +               * L0 always deals with the EPT violation. If nested EPT is
+ +               * used, and the nested mmu code discovers that the address is
+ +               * missing in the guest EPT table (EPT12), the EPT violation
+ +               * will be injected with nested_ept_inject_page_fault()
+ +               */
+ +              return true;
+ +      case EXIT_REASON_EPT_MISCONFIG:
+ +              /*
+ +               * L2 never uses directly L1's EPT, but rather L0's own EPT
+ +               * table (shadow on EPT) or a merged EPT table that L0 built
+ +               * (EPT on EPT). So any problems with the structure of the
+ +               * table is L0's fault.
+ +               */
+ +              return true;
+ +      case EXIT_REASON_PREEMPTION_TIMER:
+ +              return true;
+ +      case EXIT_REASON_PML_FULL:
+ +              /* We emulate PML support to L1. */
+ +              return true;
+ +      case EXIT_REASON_VMFUNC:
+ +              /* VM functions are emulated through L2->L0 vmexits. */
+ +              return true;
+ +      case EXIT_REASON_ENCLS:
+ +              /* SGX is never exposed to L1 */
+ +              return true;
+ +      default:
+ +              break;
+ +      }
+ +      return false;
+ +}
+ +
+ +/*
+ + * Return 1 if L1 wants to intercept an exit from L2.  Only call this when in
+ + * is_guest_mode (L2).
+ + */
+ +static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason)
+ +{
+ +      struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ +      u32 intr_info;
+ +
+ +      switch (exit_reason) {
+ +      case EXIT_REASON_EXCEPTION_NMI:
+ +              intr_info = vmx_get_intr_info(vcpu);
+ +              if (is_nmi(intr_info))
+ +                      return true;
+ +              else if (is_page_fault(intr_info))
+ +                      return true;
                 return vmcs12->exception_bitmap &
                                 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
         case EXIT_REASON_EXTERNAL_INTERRUPT:
- -              return false;
+ +              return nested_exit_on_intr(vcpu);
         case EXIT_REASON_TRIPLE_FAULT:
                 return true;
         case EXIT_REASON_INTERRUPT_WINDOW:
@@@ -5769,7 -5658,7 +5769,7 @@@
                         nested_cpu_has2(vmcs12,
                                 SECONDARY_EXEC_PAUSE_LOOP_EXITING);
         case EXIT_REASON_MCE_DURING_VMENTRY:
- -              return false;
+ +              return true;
         case EXIT_REASON_TPR_BELOW_THRESHOLD:
                 return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
         case EXIT_REASON_APIC_ACCESS:
@@@ -5781,6 -5670,22 +5781,6 @@@
                  * delivery" only come from vmcs12.
                  */
                 return true;
- -      case EXIT_REASON_EPT_VIOLATION:
- -              /*
- -               * L0 always deals with the EPT violation. If nested EPT is
- -               * used, and the nested mmu code discovers that the address is
- -               * missing in the guest EPT table (EPT12), the EPT violation
- -               * will be injected with nested_ept_inject_page_fault()
- -               */
- -              return false;
- -      case EXIT_REASON_EPT_MISCONFIG:
- -              /*
- -               * L2 never uses directly L1's EPT, but rather L0's own EPT
- -               * table (shadow on EPT) or a merged EPT table that L0 built
- -               * (EPT on EPT). So any problems with the structure of the
- -               * table is L0's fault.
- -               */
- -              return false;
         case EXIT_REASON_INVPCID:
                 return
                         nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
@@@ -5797,6 -5702,17 +5797,6 @@@
                  * the XSS exit bitmap in vmcs12.
                  */
                 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
- -      case EXIT_REASON_PREEMPTION_TIMER:
- -              return false;
- -      case EXIT_REASON_PML_FULL:
- -              /* We emulate PML support to L1. */
- -              return false;
- -      case EXIT_REASON_VMFUNC:
- -              /* VM functions are emulated through L2->L0 vmexits. */
- -              return false;
- -      case EXIT_REASON_ENCLS:
- -              /* SGX is never exposed to L1 */
- -              return false;
         case EXIT_REASON_UMWAIT:
         case EXIT_REASON_TPAUSE:
                 return nested_cpu_has2(vmcs12,
@@@ -5806,67 -5722,6 +5806,67 @@@
         }
   }
   
+ +/*
+ + * Conditionally reflect a VM-Exit into L1.  Returns %true if the VM-Exit was
+ + * reflected into L1.
+ + */
+ +bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
+ +{
+ +      struct vcpu_vmx *vmx = to_vmx(vcpu);
+ +      u32 exit_reason = vmx->exit_reason;
+ +      unsigned long exit_qual;
+ +      u32 exit_intr_info;
+ +
+ +      WARN_ON_ONCE(vmx->nested.nested_run_pending);
+ +
+ +      /*
+ +       * Late nested VM-Fail shares the same flow as nested VM-Exit since KVM
+ +       * has already loaded L2's state.
+ +       */
+ +      if (unlikely(vmx->fail)) {
+ +              trace_kvm_nested_vmenter_failed(
+ +                      "hardware VM-instruction error: ",
+ +                      vmcs_read32(VM_INSTRUCTION_ERROR));
+ +              exit_intr_info = 0;
+ +              exit_qual = 0;
+ +              goto reflect_vmexit;
+ +      }
+ +
+ +      exit_intr_info = vmx_get_intr_info(vcpu);
+ +      exit_qual = vmx_get_exit_qual(vcpu);
+ +
+ +      trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, exit_qual,
+ +                              vmx->idt_vectoring_info, exit_intr_info,
+ +                              vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
+ +                              KVM_ISA_VMX);
+ +
+ +      /* If L0 (KVM) wants the exit, it trumps L1's desires. */
+ +      if (nested_vmx_l0_wants_exit(vcpu, exit_reason))
+ +              return false;
+ +
+ +      /* If L1 doesn't want the exit, handle it in L0. */
+ +      if (!nested_vmx_l1_wants_exit(vcpu, exit_reason))
+ +              return false;
+ +
+ +      /*
+ +       * vmcs.VM_EXIT_INTR_INFO is only valid for EXCEPTION_NMI exits.  For
+ +       * EXTERNAL_INTERRUPT, the value for vmcs12->vm_exit_intr_info would
+ +       * need to be synthesized by querying the in-kernel LAPIC, but external
+ +       * interrupts are never reflected to L1 so it's a non-issue.
+ +       */
+ +      if ((exit_intr_info &
+ +           (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
+ +          (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) {
+ +              struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ +
+ +              vmcs12->vm_exit_intr_error_code =
+ +                      vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+ +      }
+ +
+ +reflect_vmexit:
+ +      nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, exit_qual);
+ +      return true;
+ +}
   
   static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
                                 struct kvm_nested_state __user *user_kvm_nested_state,
@@@ -6176,7 -6031,7 +6176,7 @@@ void nested_vmx_setup_ctls_msrs(struct 
          * reason is that if one of these bits is necessary, it will appear
          * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
          * fields of vmcs01 and vmcs02, will turn these bits off - and
- -       * nested_vmx_exit_reflected() will not pass related exits to L1.
+ +       * nested_vmx_l1_wants_exit() will not pass related exits to L1.
          * These rules have exceptions below.
          */
   
@@@ -6441,14 -6296,12 +6441,14 @@@ __init int nested_vmx_hardware_setup(st
         exit_handlers[EXIT_REASON_INVVPID]      = handle_invvpid;
         exit_handlers[EXIT_REASON_VMFUNC]       = handle_vmfunc;
   
- -      ops->check_nested_events = vmx_check_nested_events;
- -      ops->get_nested_state = vmx_get_nested_state;
- -      ops->set_nested_state = vmx_set_nested_state;
- -      ops->get_vmcs12_pages = nested_get_vmcs12_pages;
- -      ops->nested_enable_evmcs = nested_enable_evmcs;
- -      ops->nested_get_evmcs_version = nested_get_evmcs_version;
- -
         return 0;
   }
+ +
+ +struct kvm_x86_nested_ops vmx_nested_ops = {
+ +      .check_events = vmx_check_nested_events,
+ +      .get_state = vmx_get_nested_state,
+ +      .set_state = vmx_set_nested_state,
+ +      .get_vmcs12_pages = nested_get_vmcs12_pages,
+ +      .enable_evmcs = nested_enable_evmcs,
+ +      .get_evmcs_version = nested_get_evmcs_version,
+ +};
diff --combined arch/x86/kvm/vmx/vmx.c

index 455cd2c8dbce071252d647e3ad81950185a94816,89c766fad889ea2581678d1cd966dea556418529..46aa3ca019290199fdff520650bf91816ed20a4c
--- 1/arch/x86/kvm/vmx/vmx.c
--- 2/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@@ -437,11 -437,6 +437,11 @@@ static const struct kvm_vmx_segment_fie
         VMX_SEGMENT_FIELD(LDTR),
   };
   
+ +static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
+ +{
+ +      vmx->segment_cache.bitmask = 0;
+ +}
+ +
   static unsigned long host_idt_base;
   
   /*
@@@ -1343,10 -1338,6 +1343,10 @@@ void vmx_vcpu_load_vmcs(struct kvm_vcp
                 void *gdt = get_current_gdt_ro();
                 unsigned long sysenter_esp;
   
+ +              /*
+ +               * Flush all EPTP/VPID contexts, the new pCPU may have stale
+ +               * TLB entries from its previous association with the vCPU.
+ +               */
                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
   
                 /*
@@@ -1381,7 -1372,6 +1381,6 @@@ void vmx_vcpu_load(struct kvm_vcpu *vcp
   
         vmx_vcpu_pi_load(vcpu, cpu);
   
-       vmx->host_pkru = read_pkru();
         vmx->host_debugctlmsr = get_debugctlmsr();
   }
   
@@@ -2847,64 -2837,18 +2846,64 @@@ static void exit_lmode(struct kvm_vcpu 
   
   #endif
   
- -static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
+ +static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
+ +{
+ +      struct vcpu_vmx *vmx = to_vmx(vcpu);
+ +
+ +      /*
+ +       * INVEPT must be issued when EPT is enabled, irrespective of VPID, as
+ +       * the CPU is not required to invalidate guest-physical mappings on
+ +       * VM-Entry, even if VPID is disabled.  Guest-physical mappings are
+ +       * associated with the root EPT structure and not any particular VPID
+ +       * (INVVPID also isn't required to invalidate guest-physical mappings).
+ +       */
+ +      if (enable_ept) {
+ +              ept_sync_global();
+ +      } else if (enable_vpid) {
+ +              if (cpu_has_vmx_invvpid_global()) {
+ +                      vpid_sync_vcpu_global();
+ +              } else {
+ +                      vpid_sync_vcpu_single(vmx->vpid);
+ +                      vpid_sync_vcpu_single(vmx->nested.vpid02);
+ +              }
+ +      }
+ +}
+ +
+ +static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
   {
- -      int vpid = to_vmx(vcpu)->vpid;
+ +      u64 root_hpa = vcpu->arch.mmu->root_hpa;
+ +
+ +      /* No flush required if the current context is invalid. */
+ +      if (!VALID_PAGE(root_hpa))
+ +              return;
+ +
+ +      if (enable_ept)
+ +              ept_sync_context(construct_eptp(vcpu, root_hpa));
+ +      else if (!is_guest_mode(vcpu))
+ +              vpid_sync_context(to_vmx(vcpu)->vpid);
+ +      else
+ +              vpid_sync_context(nested_get_vpid02(vcpu));
+ +}
   
- -      if (!vpid_sync_vcpu_addr(vpid, addr))
- -              vpid_sync_context(vpid);
+ +static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
+ +{
+ +      /*
+ +       * vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in
+ +       * vmx_flush_tlb_guest() for an explanation of why this is ok.
+ +       */
+ +      vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr);
+ +}
   
+ +static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
+ +{
         /*
- -       * If VPIDs are not supported or enabled, then the above is a no-op.
- -       * But we don't really need a TLB flush in that case anyway, because
- -       * each VM entry/exit includes an implicit flush when VPID is 0.
+ +       * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0
+ +       * or a vpid couldn't be allocated for this vCPU.  VM-Enter and VM-Exit
+ +       * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is
+ +       * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed),
+ +       * i.e. no explicit INVVPID is necessary.
          */
+ +      vpid_sync_context(to_vmx(vcpu)->vpid);
   }
   
   static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
@@@ -2942,13 -2886,12 +2941,13 @@@ void ept_save_pdptrs(struct kvm_vcpu *v
   {
         struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
   
- -      if (is_pae_paging(vcpu)) {
- -              mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
- -              mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
- -              mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
- -              mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
- -      }
+ +      if (WARN_ON_ONCE(!is_pae_paging(vcpu)))
+ +              return;
+ +
+ +      mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
+ +      mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
+ +      mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
+ +      mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
   
         kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
   }
@@@ -3040,15 -2983,16 +3039,15 @@@ u64 construct_eptp(struct kvm_vcpu *vcp
         return eptp;
   }
   
- -void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long cr3)
+ +void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd)
   {
         struct kvm *kvm = vcpu->kvm;
         bool update_guest_cr3 = true;
         unsigned long guest_cr3;
         u64 eptp;
   
- -      guest_cr3 = cr3;
         if (enable_ept) {
- -              eptp = construct_eptp(vcpu, cr3);
+ +              eptp = construct_eptp(vcpu, pgd);
                 vmcs_write64(EPT_POINTER, eptp);
   
                 if (kvm_x86_ops.tlb_remote_flush) {
@@@ -3069,8 -3013,6 +3068,8 @@@
                 else /* vmcs01.GUEST_CR3 is already up-to-date. */
                         update_guest_cr3 = false;
                 ept_load_pdptrs(vcpu);
+ +      } else {
+ +              guest_cr3 = pgd;
         }
   
         if (update_guest_cr3)
@@@ -4645,6 -4587,26 +4644,26 @@@ static int handle_machine_check(struct 
         return 1;
   }
   
+ /*
+  * If the host has split lock detection disabled, then #AC is
+  * unconditionally injected into the guest, which is the pre split lock
+  * detection behaviour.
+  *
+  * If the host has split lock detection enabled then #AC is
+  * only injected into the guest when:
+  *  - Guest CPL == 3 (user mode)
+  *  - Guest has #AC detection enabled in CR0
+  *  - Guest EFLAGS has AC bit set
+  */
+ static inline bool guest_inject_ac(struct kvm_vcpu *vcpu)
+ {
+       if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+               return true;
+ 
+       return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
+              (kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
+ }
+ 
   static int handle_exception_nmi(struct kvm_vcpu *vcpu)
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
@@@ -4698,7 -4660,7 +4717,7 @@@
         }
   
         if (is_page_fault(intr_info)) {
- -              cr2 = vmcs_readl(EXIT_QUALIFICATION);
+ +              cr2 = vmx_get_exit_qual(vcpu);
                 /* EPT won't cause page fault directly */
                 WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
                 return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
@@@ -4710,22 -4672,17 +4729,17 @@@
                 return handle_rmode_exception(vcpu, ex_no, error_code);
   
         switch (ex_no) {
-       case AC_VECTOR:
-               kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
-               return 1;
         case DB_VECTOR:
- -              dr6 = vmcs_readl(EXIT_QUALIFICATION);
+ +              dr6 = vmx_get_exit_qual(vcpu);
                 if (!(vcpu->guest_debug &
                       (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
-                       vcpu->arch.dr6 &= ~DR_TRAP_BITS;
-                       vcpu->arch.dr6 |= dr6 | DR6_RTM;
                         if (is_icebp(intr_info))
                                 WARN_ON(!skip_emulated_instruction(vcpu));
   
-                       kvm_queue_exception(vcpu, DB_VECTOR);
+                       kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
                         return 1;
                 }
-               kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
+               kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
                 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
                 /* fall through */
         case BP_VECTOR:
@@@ -4741,6 -4698,20 +4755,20 @@@
                 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
                 kvm_run->debug.arch.exception = ex_no;
                 break;
+       case AC_VECTOR:
+               if (guest_inject_ac(vcpu)) {
+                       kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
+                       return 1;
+               }
+ 
+               /*
+                * Handle split lock. Depending on detection mode this will
+                * either warn and disable split lock detection for this
+                * task or force SIGBUS on it.
+                */
+               if (handle_guest_split_lock(kvm_rip_read(vcpu)))
+                       return 1;
+               fallthrough;
         default:
                 kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
                 kvm_run->ex.exception = ex_no;
@@@ -4769,7 -4740,7 +4797,7 @@@ static int handle_io(struct kvm_vcpu *v
         int size, in, string;
         unsigned port;
   
- -      exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ +      exit_qualification = vmx_get_exit_qual(vcpu);
         string = (exit_qualification & 16) != 0;
   
         ++vcpu->stat.io_exits;
@@@ -4860,7 -4831,7 +4888,7 @@@ static int handle_cr(struct kvm_vcpu *v
         int err;
         int ret;
   
- -      exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ +      exit_qualification = vmx_get_exit_qual(vcpu);
         cr = exit_qualification & 15;
         reg = (exit_qualification >> 8) & 15;
         switch ((exit_qualification >> 4) & 3) {
@@@ -4937,7 -4908,7 +4965,7 @@@ static int handle_dr(struct kvm_vcpu *v
         unsigned long exit_qualification;
         int dr, dr7, reg;
   
- -      exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ +      exit_qualification = vmx_get_exit_qual(vcpu);
         dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
   
         /* First, if DR does not exist, trigger UD */
@@@ -4955,16 -4926,14 +4983,14 @@@
                  * guest debugging itself.
                  */
                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
-                       vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
+                       vcpu->run->debug.arch.dr6 = DR6_BD | DR6_RTM | DR6_FIXED_1;
                         vcpu->run->debug.arch.dr7 = dr7;
                         vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
                         vcpu->run->debug.arch.exception = DB_VECTOR;
                         vcpu->run->exit_reason = KVM_EXIT_DEBUG;
                         return 0;
                 } else {
-                       vcpu->arch.dr6 &= ~DR_TRAP_BITS;
-                       vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
-                       kvm_queue_exception(vcpu, DB_VECTOR);
+                       kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BD);
                         return 1;
                 }
         }
@@@ -4995,15 -4964,6 +5021,6 @@@
         return kvm_skip_emulated_instruction(vcpu);
   }
   
- static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
- {
-       return vcpu->arch.dr6;
- }
- 
- static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
- {
- }
- 
   static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
   {
         get_debugreg(vcpu->arch.db[0], 0);
@@@ -5050,7 -5010,7 +5067,7 @@@ static int handle_invd(struct kvm_vcpu 
   
   static int handle_invlpg(struct kvm_vcpu *vcpu)
   {
- -      unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ +      unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
   
         kvm_mmu_invlpg(vcpu, exit_qualification);
         return kvm_skip_emulated_instruction(vcpu);
@@@ -5082,7 -5042,7 +5099,7 @@@ static int handle_xsetbv(struct kvm_vcp
   static int handle_apic_access(struct kvm_vcpu *vcpu)
   {
         if (likely(fasteoi)) {
- -              unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ +              unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
                 int access_type, offset;
   
                 access_type = exit_qualification & APIC_ACCESS_TYPE;
@@@ -5103,7 -5063,7 +5120,7 @@@
   
   static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
   {
- -      unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ +      unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
         int vector = exit_qualification & 0xff;
   
         /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
@@@ -5113,7 -5073,7 +5130,7 @@@
   
   static int handle_apic_write(struct kvm_vcpu *vcpu)
   {
- -      unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ +      unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
         u32 offset = exit_qualification & 0xfff;
   
         /* APIC-write VM exit is trap-like and thus no need to adjust IP */
@@@ -5134,7 -5094,7 +5151,7 @@@ static int handle_task_switch(struct kv
         idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
         type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
   
- -      exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ +      exit_qualification = vmx_get_exit_qual(vcpu);
   
         reason = (u32)exit_qualification >> 30;
         if (reason == TASK_SWITCH_GATE && idt_v) {
@@@ -5184,7 -5144,7 +5201,7 @@@ static int handle_ept_violation(struct 
         gpa_t gpa;
         u64 error_code;
   
- -      exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ +      exit_qualification = vmx_get_exit_qual(vcpu);
   
         /*
          * EPT violation happened while executing iret from NMI,
@@@ -5444,13 -5404,13 +5461,13 @@@ static int handle_invpcid(struct kvm_vc
         /* According to the Intel instruction reference, the memory operand
          * is read even if it isn't needed (e.g., for type==all)
          */
- -      if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+ +      if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
                                 vmx_instruction_info, false,
                                 sizeof(operand), &gva))
                 return 1;
   
         if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
- -              kvm_inject_page_fault(vcpu, &e);
+ +              kvm_inject_emulated_page_fault(vcpu, &e);
                 return 1;
         }
   
@@@ -5479,11 -5439,11 +5496,11 @@@
   
                 if (kvm_get_active_pcid(vcpu) == operand.pcid) {
                         kvm_mmu_sync_roots(vcpu);
- -                      kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ +                      kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
                 }
   
                 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
- -                      if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3)
+ +                      if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd)
                             == operand.pcid)
                                 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
   
@@@ -5520,7 -5480,7 +5537,7 @@@ static int handle_pml_full(struct kvm_v
   
         trace_kvm_pml_full(vcpu->vcpu_id);
   
- -      exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ +      exit_qualification = vmx_get_exit_qual(vcpu);
   
         /*
          * PML buffer FULL happened while executing iret from NMI,
@@@ -5634,8 -5594,8 +5651,8 @@@ static const int kvm_vmx_max_exit_handl
   
   static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
   {
- -      *info1 = vmcs_readl(EXIT_QUALIFICATION);
- -      *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
+ +      *info1 = vmx_get_exit_qual(vcpu);
+ +      *info2 = vmx_get_intr_info(vcpu);
   }
   
   static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
@@@ -5717,6 -5677,7 +5734,6 @@@ void dump_vmcs(void
         u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control;
         unsigned long cr4;
         u64 efer;
- -      int i, n;
   
         if (!dump_invalid_vmcs) {
                 pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n");
@@@ -5853,6 -5814,14 +5870,6 @@@
                 pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
         if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
                 pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
- -      n = vmcs_read32(CR3_TARGET_COUNT);
- -      for (i = 0; i + 1 < n; i += 4)
- -              pr_err("CR3 target%u=%016lx target%u=%016lx\n",
- -                     i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
- -                     i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
- -      if (i < n)
- -              pr_err("CR3 target%u=%016lx\n",
- -                     i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
         if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
                 pr_err("PLE Gap=%08x Window=%08x\n",
                        vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
@@@ -5902,8 -5871,8 +5919,8 @@@ static int vmx_handle_exit(struct kvm_v
                  */
                 nested_mark_vmcs12_pages_dirty(vcpu);
   
- -              if (nested_vmx_exit_reflected(vcpu, exit_reason))
- -                      return nested_vmx_reflect_vmexit(vcpu, exit_reason);
+ +              if (nested_vmx_reflect_vmexit(vcpu))
+ +                      return 1;
         }
   
         if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
@@@ -6124,15 -6093,7 +6141,15 @@@ void vmx_set_virtual_apic_mode(struct k
                 if (flexpriority_enabled) {
                         sec_exec_control |=
                                 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
- -                      vmx_flush_tlb(vcpu, true);
+ +                      kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
+ +
+ +                      /*
+ +                       * Flush the TLB, reloading the APIC access page will
+ +                       * only do so if its physical address has changed, but
+ +                       * the guest may have inserted a non-APIC mapping into
+ +                       * the TLB while the APIC access page was disabled.
+ +                       */
+ +                      kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
                 }
                 break;
         case LAPIC_MODE_X2APIC:
@@@ -6146,32 -6107,12 +6163,32 @@@
         vmx_update_msr_bitmap(vcpu);
   }
   
- -static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
+ +static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
   {
- -      if (!is_guest_mode(vcpu)) {
- -              vmcs_write64(APIC_ACCESS_ADDR, hpa);
- -              vmx_flush_tlb(vcpu, true);
+ +      struct page *page;
+ +
+ +      /* Defer reload until vmcs01 is the current VMCS. */
+ +      if (is_guest_mode(vcpu)) {
+ +              to_vmx(vcpu)->nested.reload_vmcs01_apic_access_page = true;
+ +              return;
         }
+ +
+ +      if (!(secondary_exec_controls_get(to_vmx(vcpu)) &
+ +          SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+ +              return;
+ +
+ +      page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ +      if (is_error_page(page))
+ +              return;
+ +
+ +      vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page));
+ +      vmx_flush_tlb_current(vcpu);
+ +
+ +      /*
+ +       * Do not pin apic access page in memory, the MMU notifier
+ +       * will call us again if it is migrated or swapped out.
+ +       */
+ +      put_page(page);
   }
   
   static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
@@@ -6289,16 -6230,16 +6306,16 @@@ static void vmx_apicv_post_state_restor
   
   static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
   {
- -      vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ +      u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
   
         /* if exit due to PF check for async PF */
- -      if (is_page_fault(vmx->exit_intr_info)) {
+ +      if (is_page_fault(intr_info)) {
                 vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
         /* Handle machine checks before interrupts are enabled */
- -      } else if (is_machine_check(vmx->exit_intr_info)) {
+ +      } else if (is_machine_check(intr_info)) {
                 kvm_machine_check();
         /* We need to handle NMIs before interrupts are enabled */
- -      } else if (is_nmi(vmx->exit_intr_info)) {
+ +      } else if (is_nmi(intr_info)) {
                 kvm_before_interrupt(&vmx->vcpu);
                 asm("int $2");
                 kvm_after_interrupt(&vmx->vcpu);
@@@ -6313,8 -6254,9 +6330,8 @@@ static void handle_external_interrupt_i
         unsigned long tmp;
   #endif
         gate_desc *desc;
- -      u32 intr_info;
+ +      u32 intr_info = vmx_get_intr_info(vcpu);
   
- -      intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
         if (WARN_ONCE(!is_external_intr(intr_info),
             "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
                 return;
@@@ -6350,7 -6292,8 +6367,7 @@@
   }
   STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
   
- -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
- -      enum exit_fastpath_completion *exit_fastpath)
+ +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
   
@@@ -6358,6 -6301,9 +6375,6 @@@
                 handle_external_interrupt_irqoff(vcpu);
         else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
                 handle_exception_nmi_irqoff(vmx);
- -      else if (!is_guest_mode(vcpu) &&
- -              vmx->exit_reason == EXIT_REASON_MSR_WRITE)
- -              *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
   }
   
   static bool vmx_has_emulated_msr(int index)
@@@ -6391,8 -6337,11 +6408,8 @@@ static void vmx_recover_nmi_blocking(st
         if (enable_vnmi) {
                 if (vmx->loaded_vmcs->nmi_known_unmasked)
                         return;
- -              /*
- -               * Can't use vmx->exit_intr_info since we're not sure what
- -               * the exit reason is.
- -               */
- -              exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ +
+ +              exit_intr_info = vmx_get_intr_info(&vmx->vcpu);
                 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
                 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
                 /*
@@@ -6561,9 -6510,8 +6578,9 @@@ void vmx_update_host_rsp(struct vcpu_vm
   
   bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
   
- -static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
+ +static enum exit_fastpath_completion vmx_vcpu_run(struct kvm_vcpu *vcpu)
   {
+ +      enum exit_fastpath_completion exit_fastpath;
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         unsigned long cr3, cr4;
   
@@@ -6575,7 -6523,7 +6592,7 @@@
         /* Don't enter VMX if guest state is invalid, let the exit handler
            start emulation until we arrive back to a valid state */
         if (vmx->emulation_required)
- -              return;
+ +              return EXIT_FASTPATH_NONE;
   
         if (vmx->ple_window_dirty) {
                 vmx->ple_window_dirty = false;
@@@ -6615,11 -6563,6 +6632,6 @@@
   
         kvm_load_guest_xsave_state(vcpu);
   
-       if (static_cpu_has(X86_FEATURE_PKU) &&
-           kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
-           vcpu->arch.pkru != vmx->host_pkru)
-               __write_pkru(vcpu->arch.pkru);
- 
         pt_guest_enter(vmx);
   
         if (vcpu_to_pmu(vcpu)->version)
@@@ -6700,51 -6643,32 +6712,39 @@@
         loadsegment(es, __USER_DS);
   #endif
   
- -      vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
- -                                | (1 << VCPU_EXREG_RFLAGS)
- -                                | (1 << VCPU_EXREG_PDPTR)
- -                                | (1 << VCPU_EXREG_SEGMENTS)
- -                                | (1 << VCPU_EXREG_CR3));
- -      vcpu->arch.regs_dirty = 0;
+ +      vmx_register_cache_reset(vcpu);
   
         pt_guest_exit(vmx);
   
-       /*
-        * eager fpu is enabled if PKEY is supported and CR4 is switched
-        * back on host, so it is safe to read guest PKRU from current
-        * XSAVE.
-        */
-       if (static_cpu_has(X86_FEATURE_PKU) &&
-           kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
-               vcpu->arch.pkru = rdpkru();
-               if (vcpu->arch.pkru != vmx->host_pkru)
-                       __write_pkru(vmx->host_pkru);
-       }
- 
         kvm_load_host_xsave_state(vcpu);
   
         vmx->nested.nested_run_pending = 0;
         vmx->idt_vectoring_info = 0;
   
- -      vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
- -      if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
+ +      if (unlikely(vmx->fail)) {
+ +              vmx->exit_reason = 0xdead;
+ +              return EXIT_FASTPATH_NONE;
+ +      }
+ +
+ +      vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
+ +      if (unlikely((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY))
                 kvm_machine_check();
   
- -      if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
- -              return;
+ +      if (unlikely(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
+ +              return EXIT_FASTPATH_NONE;
+ +
+ +      if (!is_guest_mode(vcpu) && vmx->exit_reason == EXIT_REASON_MSR_WRITE)
+ +              exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
+ +      else
+ +              exit_fastpath = EXIT_FASTPATH_NONE;
   
         vmx->loaded_vmcs->launched = 1;
         vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
   
         vmx_recover_nmi_blocking(vmx);
         vmx_complete_interrupts(vmx);
+ +
+ +      return exit_fastpath;
   }
   
   static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
@@@ -7785,18 -7709,14 +7785,16 @@@ static struct kvm_x86_ops vmx_x86_ops _
         .set_idt = vmx_set_idt,
         .get_gdt = vmx_get_gdt,
         .set_gdt = vmx_set_gdt,
-       .get_dr6 = vmx_get_dr6,
-       .set_dr6 = vmx_set_dr6,
         .set_dr7 = vmx_set_dr7,
         .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
         .cache_reg = vmx_cache_reg,
         .get_rflags = vmx_get_rflags,
         .set_rflags = vmx_set_rflags,
   
- -      .tlb_flush = vmx_flush_tlb,
+ +      .tlb_flush_all = vmx_flush_tlb_all,
+ +      .tlb_flush_current = vmx_flush_tlb_current,
         .tlb_flush_gva = vmx_flush_tlb_gva,
+ +      .tlb_flush_guest = vmx_flush_tlb_guest,
   
         .run = vmx_vcpu_run,
         .handle_exit = vmx_handle_exit,
@@@ -7862,7 -7782,6 +7860,7 @@@
         .post_block = vmx_post_block,
   
         .pmu_ops = &intel_pmu_ops,
+ +      .nested_ops = &vmx_nested_ops,
   
         .update_pi_irte = vmx_update_pi_irte,
   
@@@ -7878,6 -7797,12 +7876,6 @@@
         .pre_leave_smm = vmx_pre_leave_smm,
         .enable_smi_window = enable_smi_window,
   
- -      .check_nested_events = NULL,
- -      .get_nested_state = NULL,
- -      .set_nested_state = NULL,
- -      .get_vmcs12_pages = NULL,
- -      .nested_enable_evmcs = NULL,
- -      .nested_get_evmcs_version = NULL,
         .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
         .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
   };
diff --combined arch/x86/kvm/x86.c

index 8c0b77ac8dc6a7acff18c1b2ba2d7221bb79585f,d11eba8b85c63b3eeba51e88f40defa9e0a9dd98..542a00008caae6f8a45ff0f74b8bfa51e03068cc
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -97,6 -97,9 +97,6 @@@ static u64 __read_mostly efer_reserved_
   
   static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
   
- -#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
- -#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
- -
   #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
                                       KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
   
@@@ -191,44 -194,45 +191,44 @@@ u64 __read_mostly supported_xss
   EXPORT_SYMBOL_GPL(supported_xss);
   
   struct kvm_stats_debugfs_item debugfs_entries[] = {
- -      { "pf_fixed", VCPU_STAT(pf_fixed) },
- -      { "pf_guest", VCPU_STAT(pf_guest) },
- -      { "tlb_flush", VCPU_STAT(tlb_flush) },
- -      { "invlpg", VCPU_STAT(invlpg) },
- -      { "exits", VCPU_STAT(exits) },
- -      { "io_exits", VCPU_STAT(io_exits) },
- -      { "mmio_exits", VCPU_STAT(mmio_exits) },
- -      { "signal_exits", VCPU_STAT(signal_exits) },
- -      { "irq_window", VCPU_STAT(irq_window_exits) },
- -      { "nmi_window", VCPU_STAT(nmi_window_exits) },
- -      { "halt_exits", VCPU_STAT(halt_exits) },
- -      { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
- -      { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
- -      { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
- -      { "halt_wakeup", VCPU_STAT(halt_wakeup) },
- -      { "hypercalls", VCPU_STAT(hypercalls) },
- -      { "request_irq", VCPU_STAT(request_irq_exits) },
- -      { "irq_exits", VCPU_STAT(irq_exits) },
- -      { "host_state_reload", VCPU_STAT(host_state_reload) },
- -      { "fpu_reload", VCPU_STAT(fpu_reload) },
- -      { "insn_emulation", VCPU_STAT(insn_emulation) },
- -      { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
- -      { "irq_injections", VCPU_STAT(irq_injections) },
- -      { "nmi_injections", VCPU_STAT(nmi_injections) },
- -      { "req_event", VCPU_STAT(req_event) },
- -      { "l1d_flush", VCPU_STAT(l1d_flush) },
- -      { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
- -      { "mmu_pte_write", VM_STAT(mmu_pte_write) },
- -      { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
- -      { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
- -      { "mmu_flooded", VM_STAT(mmu_flooded) },
- -      { "mmu_recycled", VM_STAT(mmu_recycled) },
- -      { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
- -      { "mmu_unsync", VM_STAT(mmu_unsync) },
- -      { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
- -      { "largepages", VM_STAT(lpages, .mode = 0444) },
- -      { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
- -      { "max_mmu_page_hash_collisions",
- -              VM_STAT(max_mmu_page_hash_collisions) },
+ +      VCPU_STAT("pf_fixed", pf_fixed),
+ +      VCPU_STAT("pf_guest", pf_guest),
+ +      VCPU_STAT("tlb_flush", tlb_flush),
+ +      VCPU_STAT("invlpg", invlpg),
+ +      VCPU_STAT("exits", exits),
+ +      VCPU_STAT("io_exits", io_exits),
+ +      VCPU_STAT("mmio_exits", mmio_exits),
+ +      VCPU_STAT("signal_exits", signal_exits),
+ +      VCPU_STAT("irq_window", irq_window_exits),
+ +      VCPU_STAT("nmi_window", nmi_window_exits),
+ +      VCPU_STAT("halt_exits", halt_exits),
+ +      VCPU_STAT("halt_successful_poll", halt_successful_poll),
+ +      VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+ +      VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+ +      VCPU_STAT("halt_wakeup", halt_wakeup),
+ +      VCPU_STAT("hypercalls", hypercalls),
+ +      VCPU_STAT("request_irq", request_irq_exits),
+ +      VCPU_STAT("irq_exits", irq_exits),
+ +      VCPU_STAT("host_state_reload", host_state_reload),
+ +      VCPU_STAT("fpu_reload", fpu_reload),
+ +      VCPU_STAT("insn_emulation", insn_emulation),
+ +      VCPU_STAT("insn_emulation_fail", insn_emulation_fail),
+ +      VCPU_STAT("irq_injections", irq_injections),
+ +      VCPU_STAT("nmi_injections", nmi_injections),
+ +      VCPU_STAT("req_event", req_event),
+ +      VCPU_STAT("l1d_flush", l1d_flush),
+ +      VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
+ +      VM_STAT("mmu_pte_write", mmu_pte_write),
+ +      VM_STAT("mmu_pte_updated", mmu_pte_updated),
+ +      VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
+ +      VM_STAT("mmu_flooded", mmu_flooded),
+ +      VM_STAT("mmu_recycled", mmu_recycled),
+ +      VM_STAT("mmu_cache_miss", mmu_cache_miss),
+ +      VM_STAT("mmu_unsync", mmu_unsync),
+ +      VM_STAT("remote_tlb_flush", remote_tlb_flush),
+ +      VM_STAT("largepages", lpages, .mode = 0444),
+ +      VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444),
+ +      VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions),
         { NULL }
   };
   
@@@ -568,11 -572,12 +568,12 @@@ void kvm_requeue_exception(struct kvm_v
   }
   EXPORT_SYMBOL_GPL(kvm_requeue_exception);
   
- static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
-                                 unsigned long payload)
+ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
+                          unsigned long payload)
   {
         kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
   }
+ EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
   
   static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
                                     u32 error_code, unsigned long payload)
@@@ -607,28 -612,15 +608,28 @@@ void kvm_inject_page_fault(struct kvm_v
   }
   EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
   
- -static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
+ +bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+ +                                  struct x86_exception *fault)
   {
- -      if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
- -              vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
- -      else
- -              vcpu->arch.mmu->inject_page_fault(vcpu, fault);
+ +      struct kvm_mmu *fault_mmu;
+ +      WARN_ON_ONCE(fault->vector != PF_VECTOR);
+ +
+ +      fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu :
+ +                                             vcpu->arch.walk_mmu;
+ +
+ +      /*
+ +       * Invalidate the TLB entry for the faulting address, if it exists,
+ +       * else the access will fault indefinitely (and to emulate hardware).
+ +       */
+ +      if ((fault->error_code & PFERR_PRESENT_MASK) &&
+ +          !(fault->error_code & PFERR_RSVD_MASK))
+ +              kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
+ +                                     fault_mmu->root_hpa);
   
+ +      fault_mmu->inject_page_fault(vcpu, fault);
         return fault->nested_page_fault;
   }
+ +EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
   
   void kvm_inject_nmi(struct kvm_vcpu *vcpu)
   {
@@@ -845,11 -837,25 +846,25 @@@ void kvm_load_guest_xsave_state(struct 
                     vcpu->arch.ia32_xss != host_xss)
                         wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
         }
+ 
+       if (static_cpu_has(X86_FEATURE_PKU) &&
+           (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
+            (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) &&
+           vcpu->arch.pkru != vcpu->arch.host_pkru)
+               __write_pkru(vcpu->arch.pkru);
   }
   EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
   
   void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
   {
+       if (static_cpu_has(X86_FEATURE_PKU) &&
+           (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
+            (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
+               vcpu->arch.pkru = rdpkru();
+               if (vcpu->arch.pkru != vcpu->arch.host_pkru)
+                       __write_pkru(vcpu->arch.host_pkru);
+       }
+ 
         if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
   
                 if (vcpu->arch.xcr0 != host_xcr0)
@@@ -935,19 -941,6 +950,6 @@@ EXPORT_SYMBOL_GPL(kvm_set_xcr)
         __reserved_bits;                                \
   })
   
- static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c)
- {
-       u64 reserved_bits = __cr4_reserved_bits(cpu_has, c);
- 
-       if (kvm_cpu_cap_has(X86_FEATURE_LA57))
-               reserved_bits &= ~X86_CR4_LA57;
- 
-       if (kvm_cpu_cap_has(X86_FEATURE_UMIP))
-               reserved_bits &= ~X86_CR4_UMIP;
- 
-       return reserved_bits;
- }
- 
   static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
   {
         if (cr4 & cr4_reserved_bits)
@@@ -1015,7 -1008,7 +1017,7 @@@ int kvm_set_cr3(struct kvm_vcpu *vcpu, 
         if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
                 if (!skip_tlb_flush) {
                         kvm_mmu_sync_roots(vcpu);
- -                      kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ +                      kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
                 }
                 return 0;
         }
@@@ -1027,7 -1020,7 +1029,7 @@@
                  !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
                 return 1;
   
- -      kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
+ +      kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush);
         vcpu->arch.cr3 = cr3;
         kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
   
@@@ -1067,12 -1060,6 +1069,6 @@@ static void kvm_update_dr0123(struct kv
         }
   }
   
- static void kvm_update_dr6(struct kvm_vcpu *vcpu)
- {
-       if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
-               kvm_x86_ops.set_dr6(vcpu, vcpu->arch.dr6);
- }
- 
   static void kvm_update_dr7(struct kvm_vcpu *vcpu)
   {
         unsigned long dr7;
@@@ -1112,7 -1099,6 +1108,6 @@@ static int __kvm_set_dr(struct kvm_vcp
                 if (val & 0xffffffff00000000ULL)
                         return -1; /* #GP */
                 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
-               kvm_update_dr6(vcpu);
                 break;
         case 5:
                 /* fall through */
@@@ -1148,10 -1134,7 +1143,7 @@@ int kvm_get_dr(struct kvm_vcpu *vcpu, i
         case 4:
                 /* fall through */
         case 6:
-               if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
-                       *val = vcpu->arch.dr6;
-               else
-                       *val = kvm_x86_ops.get_dr6(vcpu);
+               *val = vcpu->arch.dr6;
                 break;
         case 5:
                 /* fall through */
@@@ -2686,16 -2669,10 +2678,16 @@@ static void kvmclock_reset(struct kvm_v
         vcpu->arch.time = 0;
   }
   
- -static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+ +static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
   {
         ++vcpu->stat.tlb_flush;
- -      kvm_x86_ops.tlb_flush(vcpu, invalidate_gpa);
+ +      kvm_x86_ops.tlb_flush_all(vcpu);
+ +}
+ +
+ +static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
+ +{
+ +      ++vcpu->stat.tlb_flush;
+ +      kvm_x86_ops.tlb_flush_guest(vcpu);
   }
   
   static void record_steal_time(struct kvm_vcpu *vcpu)
@@@ -2721,7 -2698,7 +2713,7 @@@
         trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
                 st->preempted & KVM_VCPU_FLUSH_TLB);
         if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
- -              kvm_vcpu_flush_tlb(vcpu, false);
+ +              kvm_vcpu_flush_tlb_guest(vcpu);
   
         vcpu->arch.st.preempted = 0;
   
@@@ -3075,6 -3052,17 +3067,17 @@@ int kvm_get_msr_common(struct kvm_vcpu 
         case MSR_IA32_PERF_CTL:
         case MSR_AMD64_DC_CFG:
         case MSR_F15H_EX_CFG:
+       /*
+        * Intel Sandy Bridge CPUs must support the RAPL (running average power
+        * limit) MSRs. Just return 0, as we do not want to expose the host
+        * data here. Do not conditionalize this on CPUID, as KVM does not do
+        * so for existing CPU-specific MSRs.
+        */
+       case MSR_RAPL_POWER_UNIT:
+       case MSR_PP0_ENERGY_STATUS:     /* Power plane 0 (core) */
+       case MSR_PP1_ENERGY_STATUS:     /* Power plane 1 (graphics uncore) */
+       case MSR_PKG_ENERGY_STATUS:     /* Total package */
+       case MSR_DRAM_ENERGY_STATUS:    /* DRAM controller */
                 msr_info->data = 0;
                 break;
         case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
@@@ -3389,6 -3377,7 +3392,7 @@@ int kvm_vm_ioctl_check_extension(struc
         case KVM_CAP_GET_MSR_FEATURES:
         case KVM_CAP_MSR_PLATFORM_INFO:
         case KVM_CAP_EXCEPTION_PAYLOAD:
+       case KVM_CAP_SET_GUEST_DEBUG:
                 r = 1;
                 break;
         case KVM_CAP_SYNC_REGS:
@@@ -3442,14 -3431,14 +3446,14 @@@
                 r = KVM_X2APIC_API_VALID_FLAGS;
                 break;
         case KVM_CAP_NESTED_STATE:
- -              r = kvm_x86_ops.get_nested_state ?
- -                      kvm_x86_ops.get_nested_state(NULL, NULL, 0) : 0;
+ +              r = kvm_x86_ops.nested_ops->get_state ?
+ +                      kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0;
                 break;
         case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
                 r = kvm_x86_ops.enable_direct_tlbflush != NULL;
                 break;
         case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
- -              r = kvm_x86_ops.nested_enable_evmcs != NULL;
+ +              r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
                 break;
         default:
                 break;
@@@ -3574,6 -3563,9 +3578,9 @@@ void kvm_arch_vcpu_load(struct kvm_vcp
   
         kvm_x86_ops.vcpu_load(vcpu, cpu);
   
+       /* Save host pkru register if supported */
+       vcpu->arch.host_pkru = read_pkru();
+ 
         /* Apply any externally detected TSC adjustments (due to suspend) */
         if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
                 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
@@@ -4025,7 -4017,6 +4032,6 @@@ static int kvm_vcpu_ioctl_x86_set_debug
         memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
         kvm_update_dr0123(vcpu);
         vcpu->arch.dr6 = dbgregs->dr6;
-       kvm_update_dr6(vcpu);
         vcpu->arch.dr7 = dbgregs->dr7;
         kvm_update_dr7(vcpu);
   
@@@ -4235,9 -4226,9 +4241,9 @@@ static int kvm_vcpu_ioctl_enable_cap(st
                 return kvm_hv_activate_synic(vcpu, cap->cap ==
                                              KVM_CAP_HYPERV_SYNIC2);
         case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
- -              if (!kvm_x86_ops.nested_enable_evmcs)
+ +              if (!kvm_x86_ops.nested_ops->enable_evmcs)
                         return -ENOTTY;
- -              r = kvm_x86_ops.nested_enable_evmcs(vcpu, &vmcs_version);
+ +              r = kvm_x86_ops.nested_ops->enable_evmcs(vcpu, &vmcs_version);
                 if (!r) {
                         user_ptr = (void __user *)(uintptr_t)cap->args[0];
                         if (copy_to_user(user_ptr, &vmcs_version,
@@@ -4552,7 -4543,7 +4558,7 @@@ long kvm_arch_vcpu_ioctl(struct file *f
                 u32 user_data_size;
   
                 r = -EINVAL;
- -              if (!kvm_x86_ops.get_nested_state)
+ +              if (!kvm_x86_ops.nested_ops->get_state)
                         break;
   
                 BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
@@@ -4560,8 -4551,8 +4566,8 @@@
                 if (get_user(user_data_size, &user_kvm_nested_state->size))
                         break;
   
- -              r = kvm_x86_ops.get_nested_state(vcpu, user_kvm_nested_state,
- -                                                user_data_size);
+ +              r = kvm_x86_ops.nested_ops->get_state(vcpu, user_kvm_nested_state,
+ +                                                   user_data_size);
                 if (r < 0)
                         break;
   
@@@ -4582,7 -4573,7 +4588,7 @@@
                 int idx;
   
                 r = -EINVAL;
- -              if (!kvm_x86_ops.set_nested_state)
+ +              if (!kvm_x86_ops.nested_ops->set_state)
                         break;
   
                 r = -EFAULT;
@@@ -4604,7 -4595,7 +4610,7 @@@
                         break;
   
                 idx = srcu_read_lock(&vcpu->kvm->srcu);
- -              r = kvm_x86_ops.set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
+ +              r = kvm_x86_ops.nested_ops->set_state(vcpu, user_kvm_nested_state, &kvm_state);
                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
                 break;
         }
@@@ -5064,10 -5055,13 +5070,13 @@@ set_identity_unlock
                 r = -EFAULT;
                 if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
                         goto out;
+               mutex_lock(&kvm->lock);
                 r = -ENXIO;
                 if (!kvm->arch.vpit)
-                       goto out;
+                       goto set_pit_out;
                 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
+ set_pit_out:
+               mutex_unlock(&kvm->lock);
                 break;
         }
         case KVM_GET_PIT2: {
@@@ -5087,10 -5081,13 +5096,13 @@@
                 r = -EFAULT;
                 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
                         goto out;
+               mutex_lock(&kvm->lock);
                 r = -ENXIO;
                 if (!kvm->arch.vpit)
-                       goto out;
+                       goto set_pit2_out;
                 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
+ set_pit2_out:
+               mutex_unlock(&kvm->lock);
                 break;
         }
         case KVM_REINJECT_CONTROL: {
@@@ -5854,6 -5851,7 +5866,7 @@@ static int emulator_cmpxchg_emulated(st
   {
         struct kvm_host_map map;
         struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+       u64 page_line_mask;
         gpa_t gpa;
         char *kaddr;
         bool exchanged;
@@@ -5868,7 -5866,16 +5881,16 @@@
             (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
                 goto emul_write;
   
-       if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
+       /*
+        * Emulate the atomic as a straight write to avoid #AC if SLD is
+        * enabled in the host and the access splits a cache line.
+        */
+       if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+               page_line_mask = ~(cache_line_size() - 1);
+       else
+               page_line_mask = PAGE_MASK;
+ 
+       if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
                 goto emul_write;
   
         if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
@@@ -6396,7 -6403,7 +6418,7 @@@ static bool inject_emulated_exception(s
   {
         struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
         if (ctxt->exception.vector == PF_VECTOR)
- -              return kvm_propagate_fault(vcpu, &ctxt->exception);
+ +              return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
   
         if (ctxt->exception.error_code_valid)
                 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
@@@ -6659,7 -6666,7 +6681,7 @@@ static int kvm_vcpu_do_singlestep(struc
   
         if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
                 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
-               kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+               kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
                 kvm_run->debug.arch.exception = DB_VECTOR;
                 kvm_run->exit_reason = KVM_EXIT_DEBUG;
                 return 0;
@@@ -6719,9 -6726,7 +6741,7 @@@ static bool kvm_vcpu_check_breakpoint(s
                                            vcpu->arch.db);
   
                 if (dr6 != 0) {
-                       vcpu->arch.dr6 &= ~DR_TRAP_BITS;
-                       vcpu->arch.dr6 |= dr6 | DR6_RTM;
-                       kvm_queue_exception(vcpu, DB_VECTOR);
+                       kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
                         *r = 1;
                         return true;
                 }
@@@ -7699,8 -7704,8 +7719,8 @@@ static int inject_pending_event(struct 
          * from L2 to L1 due to pending L1 events which require exit
          * from L2 to L1.
          */
- -      if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
- -              r = kvm_x86_ops.check_nested_events(vcpu);
+ +      if (is_guest_mode(vcpu)) {
+ +              r = kvm_x86_ops.nested_ops->check_events(vcpu);
                 if (r != 0)
                         return r;
         }
@@@ -7761,8 -7766,8 +7781,8 @@@
                  * proposal and current concerns.  Perhaps we should be setting
                  * KVM_REQ_EVENT only on certain events and not unconditionally?
                  */
- -              if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
- -                      r = kvm_x86_ops.check_nested_events(vcpu);
+ +              if (is_guest_mode(vcpu)) {
+ +                      r = kvm_x86_ops.nested_ops->check_events(vcpu);
                         if (r != 0)
                                 return r;
                 }
@@@ -8042,7 -8047,7 +8062,7 @@@ void kvm_make_scan_ioapic_request_mask(
         zalloc_cpumask_var(&cpus, GFP_ATOMIC);
   
         kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
-                                   vcpu_bitmap, cpus);
+                                   NULL, vcpu_bitmap, cpus);
   
         free_cpumask_var(cpus);
   }
@@@ -8072,6 -8077,7 +8092,7 @@@ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv
    */
   void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
   {
+       struct kvm_vcpu *except;
         unsigned long old, new, expected;
   
         if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
@@@ -8096,7 -8102,17 +8117,17 @@@
         trace_kvm_apicv_update_request(activate, bit);
         if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
                 kvm_x86_ops.pre_update_apicv_exec_ctrl(kvm, activate);
-       kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
+ 
+       /*
+        * Sending request to update APICV for all other vcpus,
+        * while update the calling vcpu immediately instead of
+        * waiting for another #VMEXIT to handle the request.
+        */
+       except = kvm_get_running_vcpu();
+       kvm_make_all_cpus_request_except(kvm, KVM_REQ_APICV_UPDATE,
+                                        except);
+       if (except)
+               kvm_vcpu_update_apicv(except);
   }
   EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
   
@@@ -8153,13 -8169,24 +8184,13 @@@ int kvm_arch_mmu_notifier_invalidate_ra
   
   void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
   {
- -      struct page *page = NULL;
- -
         if (!lapic_in_kernel(vcpu))
                 return;
   
         if (!kvm_x86_ops.set_apic_access_page_addr)
                 return;
   
- -      page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
- -      if (is_error_page(page))
- -              return;
- -      kvm_x86_ops.set_apic_access_page_addr(vcpu, page_to_phys(page));
- -
- -      /*
- -       * Do not pin apic access page in memory, the MMU notifier
- -       * will call us again if it is migrated or swapped out.
- -       */
- -      put_page(page);
+ +      kvm_x86_ops.set_apic_access_page_addr(vcpu);
   }
   
   void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
@@@ -8179,13 -8206,13 +8210,13 @@@ static int vcpu_enter_guest(struct kvm_
         bool req_int_win =
                 dm_request_for_irq_injection(vcpu) &&
                 kvm_cpu_accept_dm_intr(vcpu);
- -      enum exit_fastpath_completion exit_fastpath = EXIT_FASTPATH_NONE;
+ +      enum exit_fastpath_completion exit_fastpath;
   
         bool req_immediate_exit = false;
   
         if (kvm_request_pending(vcpu)) {
                 if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
- -                      if (unlikely(!kvm_x86_ops.get_vmcs12_pages(vcpu))) {
+ +                      if (unlikely(!kvm_x86_ops.nested_ops->get_vmcs12_pages(vcpu))) {
                                 r = 0;
                                 goto out;
                         }
@@@ -8207,17 -8234,8 +8238,17 @@@
                         kvm_mmu_sync_roots(vcpu);
                 if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu))
                         kvm_mmu_load_pgd(vcpu);
- -              if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
- -                      kvm_vcpu_flush_tlb(vcpu, true);
+ +              if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
+ +                      kvm_vcpu_flush_tlb_all(vcpu);
+ +
+ +                      /* Flushing all ASIDs flushes the current ASID... */
+ +                      kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ +              }
+ +              if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+ +                      kvm_vcpu_flush_tlb_current(vcpu);
+ +              if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
+ +                      kvm_vcpu_flush_tlb_guest(vcpu);
+ +
                 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
                         vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
                         r = 0;
@@@ -8406,7 -8424,7 +8437,7 @@@
                 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
         }
   
- -      kvm_x86_ops.run(vcpu);
+ +      exit_fastpath = kvm_x86_ops.run(vcpu);
   
         /*
          * Do this here before restoring debug registers on the host.  And
@@@ -8418,7 -8436,6 +8449,6 @@@
                 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
                 kvm_x86_ops.sync_dirty_debug_regs(vcpu);
                 kvm_update_dr0123(vcpu);
-               kvm_update_dr6(vcpu);
                 kvm_update_dr7(vcpu);
                 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
         }
@@@ -8438,7 -8455,7 +8468,7 @@@
         vcpu->mode = OUTSIDE_GUEST_MODE;
         smp_wmb();
   
- -      kvm_x86_ops.handle_exit_irqoff(vcpu, &exit_fastpath);
+ +      kvm_x86_ops.handle_exit_irqoff(vcpu);
   
         /*
          * Consume any pending interrupts, including the possible source of
@@@ -8527,8 -8544,8 +8557,8 @@@ static inline int vcpu_block(struct kv
   
   static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
   {
- -      if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events)
- -              kvm_x86_ops.check_nested_events(vcpu);
+ +      if (is_guest_mode(vcpu))
+ +              kvm_x86_ops.nested_ops->check_events(vcpu);
   
         return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
                 !vcpu->arch.apf.halted);
@@@ -8710,9 -8727,8 +8740,9 @@@ static void kvm_put_guest_fpu(struct kv
         trace_kvm_fpu(0);
   }
   
- -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+ +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
   {
+ +      struct kvm_run *kvm_run = vcpu->run;
         int r;
   
         vcpu_load(vcpu);
@@@ -8730,18 -8746,18 +8760,18 @@@
                 r = -EAGAIN;
                 if (signal_pending(current)) {
                         r = -EINTR;
- -                      vcpu->run->exit_reason = KVM_EXIT_INTR;
+ +                      kvm_run->exit_reason = KVM_EXIT_INTR;
                         ++vcpu->stat.signal_exits;
                 }
                 goto out;
         }
   
- -      if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
+ +      if (kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
                 r = -EINVAL;
                 goto out;
         }
   
- -      if (vcpu->run->kvm_dirty_regs) {
+ +      if (kvm_run->kvm_dirty_regs) {
                 r = sync_regs(vcpu);
                 if (r != 0)
                         goto out;
@@@ -8771,7 -8787,7 +8801,7 @@@
   
   out:
         kvm_put_guest_fpu(vcpu);
- -      if (vcpu->run->kvm_valid_regs)
+ +      if (kvm_run->kvm_valid_regs)
                 store_regs(vcpu);
         post_kvm_run_save(vcpu);
         kvm_sigset_deactivate(vcpu);
@@@ -9480,7 -9496,6 +9510,6 @@@ void kvm_vcpu_reset(struct kvm_vcpu *vc
         memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
         kvm_update_dr0123(vcpu);
         vcpu->arch.dr6 = DR6_INIT;
-       kvm_update_dr6(vcpu);
         vcpu->arch.dr7 = DR7_FIXED_1;
         kvm_update_dr7(vcpu);
   
@@@ -9662,7 -9677,9 +9691,9 @@@ int kvm_arch_hardware_setup(void *opaqu
         if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
                 supported_xss = 0;
   
-       cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data);
+ #define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
+       cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
+ #undef __kvm_cpu_cap_has
   
         if (kvm_has_tsc_control) {
                 /*
@@@ -9694,7 -9711,8 +9725,8 @@@ int kvm_arch_check_processor_compat(voi
   
         WARN_ON(!irqs_disabled());
   
-       if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits)
+       if (__cr4_reserved_bits(cpu_has, c) !=
+           __cr4_reserved_bits(cpu_has, &boot_cpu_data))
                 return -EIO;
   
         return ops->check_processor_compatibility();
diff --combined include/linux/kvm_host.h

index 3cc6ccbb118397d120b063611a344106cbb0868b,131cc1527d689a8ee3acf2da61def9a4a900d7a9..abfa71cb5d2d0f512aca74a75d1873c27d094a9b
--- 1/include/linux/kvm_host.h
--- 2/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@@ -503,7 -503,6 +503,7 @@@ struct kvm 
         struct srcu_struct srcu;
         struct srcu_struct irq_srcu;
         pid_t userspace_pid;
+ +      unsigned int max_halt_poll_ns;
   };
   
   #define kvm_err(fmt, ...) \
@@@ -814,8 -813,11 +814,11 @@@ void kvm_flush_remote_tlbs(struct kvm *
   void kvm_reload_remote_mmus(struct kvm *kvm);
   
   bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
+                                struct kvm_vcpu *except,
                                  unsigned long *vcpu_bitmap, cpumask_var_t tmp);
   bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
+ bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
+                                     struct kvm_vcpu *except);
   bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
                                 unsigned long *vcpu_bitmap);
   
@@@ -867,7 -869,7 +870,7 @@@ int kvm_arch_vcpu_ioctl_set_mpstate(str
                                     struct kvm_mp_state *mp_state);
   int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
                                         struct kvm_guest_debug *dbg);
- -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
+ +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu);
   
   int kvm_arch_init(void *opaque);
   void kvm_arch_exit(void);
@@@ -1131,11 -1133,6 +1134,11 @@@ struct kvm_stats_debugfs_item 
   #define KVM_DBGFS_GET_MODE(dbgfs_item)                                         \
         ((dbgfs_item)->mode ? (dbgfs_item)->mode : 0644)
   
+ +#define VM_STAT(n, x, ...)                                                    \
+ +      { n, offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ }
+ +#define VCPU_STAT(n, x, ...)                                                  \
+ +      { n, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ }
+ +
   extern struct kvm_stats_debugfs_item debugfs_entries[];
   extern struct dentry *kvm_debugfs_dir;
   
@@@ -1358,12 -1355,6 +1361,12 @@@ static inline void kvm_vcpu_set_dy_elig
   }
   #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
   
+ +static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot)
+ +{
+ +      return (memslot && memslot->id < KVM_USER_MEM_SLOTS &&
+ +              !(memslot->flags & KVM_MEMSLOT_INVALID));
+ +}
+ +
   struct kvm_vcpu *kvm_get_running_vcpu(void);
   struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
   
diff --combined tools/testing/selftests/kvm/.gitignore

index 5947cc119abccfbeb27cfb41c8814f93182eb46f,a9b2b48947ffc6ff1fbf25b37ad5113b1840d0aa..222e50104296a65f36dcc5ca4d4665a6fe9c2ff5
--- 1/tools/testing/selftests/kvm/.gitignore
--- 2/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@@ -1,3 -1,4 +1,4 @@@
+ # SPDX-License-Identifier: GPL-2.0-only
   /s390x/memop
   /s390x/resets
   /s390x/sync_regs_test
@@@ -6,6 -7,7 +7,6 @@@
   /x86_64/hyperv_cpuid
   /x86_64/mmio_warning_test
   /x86_64/platform_info_test
- -/x86_64/set_memory_region_test
   /x86_64/set_sregs_test
   /x86_64/smm_test
   /x86_64/state_test
@@@ -20,5 -22,4 +21,5 @@@
   /demand_paging_test
   /dirty_log_test
   /kvm_create_max_vcpus
+ +/set_memory_region_test
   /steal_time
diff --combined tools/testing/selftests/kvm/Makefile

index 7af62030c12f6de366fc6557fce2fd751714e176,44b6ef51316476342bd42be5421696afd7eed1e8..c66f4eec34111c16361e4a5cf174bf386732d386
--- 1/tools/testing/selftests/kvm/Makefile
--- 2/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@@ -17,6 -17,7 +17,6 @@@ TEST_GEN_PROGS_x86_64 += x86_64/evmcs_t
   TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
   TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
   TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
- -TEST_GEN_PROGS_x86_64 += x86_64/set_memory_region_test
   TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
   TEST_GEN_PROGS_x86_64 += x86_64/smm_test
   TEST_GEN_PROGS_x86_64 += x86_64/state_test
@@@ -27,18 -28,17 +27,19 @@@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_dir
   TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
   TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
   TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
+ TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
   TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
   TEST_GEN_PROGS_x86_64 += demand_paging_test
   TEST_GEN_PROGS_x86_64 += dirty_log_test
   TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
+ +TEST_GEN_PROGS_x86_64 += set_memory_region_test
   TEST_GEN_PROGS_x86_64 += steal_time
   
   TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
   TEST_GEN_PROGS_aarch64 += demand_paging_test
   TEST_GEN_PROGS_aarch64 += dirty_log_test
   TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
+ +TEST_GEN_PROGS_aarch64 += set_memory_region_test
   TEST_GEN_PROGS_aarch64 += steal_time
   
   TEST_GEN_PROGS_s390x = s390x/memop
@@@ -47,7 -47,6 +48,7 @@@ TEST_GEN_PROGS_s390x += s390x/sync_regs
   TEST_GEN_PROGS_s390x += demand_paging_test
   TEST_GEN_PROGS_s390x += dirty_log_test
   TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
+ +TEST_GEN_PROGS_s390x += set_memory_region_test
   
   TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
   LIBKVM += $(LIBKVM_$(UNAME_M))
diff --combined tools/testing/selftests/kvm/include/kvm_util.h

index 53b11d725d81d9a22645f47a7dc832269cbe9299,92e184a422eedf97115ad3ab96460d307a16fa58..e244c6ecfc1d50e37c8e5277cee042a5cef225b8
--- 1/tools/testing/selftests/kvm/include/kvm_util.h
--- 2/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@@ -10,7 -10,6 +10,7 @@@
   #include "test_util.h"
   
   #include "asm/kvm.h"
+ +#include "linux/list.h"
   #include "linux/kvm.h"
   #include <sys/ioctl.h>
   
@@@ -114,7 -113,6 +114,7 @@@ int _vcpu_ioctl(struct kvm_vm *vm, uint
   void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
   void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
   void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
+ +void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
   void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
   vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
                           uint32_t data_memslot, uint32_t pgd_memslot);
@@@ -145,6 -143,8 +145,8 @@@ struct kvm_run *vcpu_state(struct kvm_v
   void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
   int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
   void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
+ void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
+                         struct kvm_guest_debug *debug);
   void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
                        struct kvm_mp_state *mp_state);
   void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
@@@ -256,7 -256,6 +258,7 @@@ bool vm_is_unrestricted_guest(struct kv
   unsigned int vm_get_page_size(struct kvm_vm *vm);
   unsigned int vm_get_page_shift(struct kvm_vm *vm);
   unsigned int vm_get_max_gfn(struct kvm_vm *vm);
+ +int vm_get_fd(struct kvm_vm *vm);
   
   unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
   unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
@@@ -314,26 -313,11 +316,26 @@@ uint64_t get_ucall(struct kvm_vm *vm, u
   
   #define GUEST_SYNC(stage)     ucall(UCALL_SYNC, 2, "hello", stage)
   #define GUEST_DONE()          ucall(UCALL_DONE, 0)
- -#define GUEST_ASSERT(_condition) do {                 \
- -      if (!(_condition))                              \
- -              ucall(UCALL_ABORT, 2,                   \
- -                      "Failed guest assert: "         \
- -                      #_condition, __LINE__);         \
+ +#define __GUEST_ASSERT(_condition, _nargs, _args...) do {     \
+ +      if (!(_condition))                                      \
+ +              ucall(UCALL_ABORT, 2 + _nargs,                  \
+ +                      "Failed guest assert: "                 \
+ +                      #_condition, __LINE__, _args);          \
   } while (0)
   
+ +#define GUEST_ASSERT(_condition) \
+ +      __GUEST_ASSERT((_condition), 0, 0)
+ +
+ +#define GUEST_ASSERT_1(_condition, arg1) \
+ +      __GUEST_ASSERT((_condition), 1, (arg1))
+ +
+ +#define GUEST_ASSERT_2(_condition, arg1, arg2) \
+ +      __GUEST_ASSERT((_condition), 2, (arg1), (arg2))
+ +
+ +#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
+ +      __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
+ +
+ +#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
+ +      __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+ +
   #endif /* SELFTEST_KVM_UTIL_H */
diff --combined tools/testing/selftests/kvm/lib/kvm_util.c

index 33ab0a36d23069a82a5c7fd1f8ca785ded63a48d,9622431069bc4597de3e4ab55e90baa4ec7e7b65..c9cede5c7d0de63480cf62bbbdeaeca7d083ed88
--- 1/tools/testing/selftests/kvm/lib/kvm_util.c
--- 2/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@@ -161,9 -161,6 +161,9 @@@ struct kvm_vm *_vm_create(enum vm_guest
         vm = calloc(1, sizeof(*vm));
         TEST_ASSERT(vm != NULL, "Insufficient Memory");
   
+ +      INIT_LIST_HEAD(&vm->vcpus);
+ +      INIT_LIST_HEAD(&vm->userspace_mem_regions);
+ +
         vm->mode = mode;
         vm->type = 0;
   
@@@ -261,7 -258,8 +261,7 @@@ void kvm_vm_restart(struct kvm_vm *vmp
         if (vmp->has_irqchip)
                 vm_create_irqchip(vmp);
   
- -      for (region = vmp->userspace_mem_region_head; region;
- -              region = region->next) {
+ +      list_for_each_entry(region, &vmp->userspace_mem_regions, list) {
                 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
                 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
                             "  rc: %i errno: %i\n"
@@@ -321,7 -319,8 +321,7 @@@ userspace_mem_region_find(struct kvm_v
   {
         struct userspace_mem_region *region;
   
- -      for (region = vm->userspace_mem_region_head; region;
- -              region = region->next) {
+ +      list_for_each_entry(region, &vm->userspace_mem_regions, list) {
                 uint64_t existing_start = region->region.guest_phys_addr;
                 uint64_t existing_end = region->region.guest_phys_addr
                         + region->region.memory_size - 1;
@@@ -379,11 -378,11 +379,11 @@@ kvm_userspace_memory_region_find(struc
    */
   struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
   {
- -      struct vcpu *vcpup;
+ +      struct vcpu *vcpu;
   
- -      for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
- -              if (vcpup->id == vcpuid)
- -                      return vcpup;
+ +      list_for_each_entry(vcpu, &vm->vcpus, list) {
+ +              if (vcpu->id == vcpuid)
+ +                      return vcpu;
         }
   
         return NULL;
@@@ -393,16 -392,18 +393,16 @@@
    * VM VCPU Remove
    *
    * Input Args:
- - *   vm - Virtual Machine
- - *   vcpuid - VCPU ID
+ + *   vcpu - VCPU to remove
    *
    * Output Args: None
    *
    * Return: None, TEST_ASSERT failures for all error conditions
    *
- - * Within the VM specified by vm, removes the VCPU given by vcpuid.
+ + * Removes a vCPU from a VM and frees its resources.
    */
- -static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
+ +static void vm_vcpu_rm(struct vcpu *vcpu)
   {
- -      struct vcpu *vcpu = vcpu_find(vm, vcpuid);
         int ret;
   
         ret = munmap(vcpu->state, sizeof(*vcpu->state));
@@@ -412,17 -413,21 +412,17 @@@
         TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
                 "errno: %i", ret, errno);
   
- -      if (vcpu->next)
- -              vcpu->next->prev = vcpu->prev;
- -      if (vcpu->prev)
- -              vcpu->prev->next = vcpu->next;
- -      else
- -              vm->vcpu_head = vcpu->next;
+ +      list_del(&vcpu->list);
         free(vcpu);
   }
   
   void kvm_vm_release(struct kvm_vm *vmp)
   {
+ +      struct vcpu *vcpu, *tmp;
         int ret;
   
- -      while (vmp->vcpu_head)
- -              vm_vcpu_rm(vmp, vmp->vcpu_head->id);
+ +      list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
+ +              vm_vcpu_rm(vcpu);
   
         ret = close(vmp->fd);
         TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
@@@ -433,38 -438,35 +433,38 @@@
                 "  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
   }
   
+ +static void __vm_mem_region_delete(struct kvm_vm *vm,
+ +                                 struct userspace_mem_region *region)
+ +{
+ +      int ret;
+ +
+ +      list_del(&region->list);
+ +
+ +      region->region.memory_size = 0;
+ +      ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ +      TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
+ +                  "rc: %i errno: %i", ret, errno);
+ +
+ +      sparsebit_free(&region->unused_phy_pages);
+ +      ret = munmap(region->mmap_start, region->mmap_size);
+ +      TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
+ +
+ +      free(region);
+ +}
+ +
   /*
    * Destroys and frees the VM pointed to by vmp.
    */
   void kvm_vm_free(struct kvm_vm *vmp)
   {
- -      int ret;
+ +      struct userspace_mem_region *region, *tmp;
   
         if (vmp == NULL)
                 return;
   
         /* Free userspace_mem_regions. */
- -      while (vmp->userspace_mem_region_head) {
- -              struct userspace_mem_region *region
- -                      = vmp->userspace_mem_region_head;
- -
- -              region->region.memory_size = 0;
- -              ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
- -                      &region->region);
- -              TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
- -                      "rc: %i errno: %i", ret, errno);
- -
- -              vmp->userspace_mem_region_head = region->next;
- -              sparsebit_free(&region->unused_phy_pages);
- -              ret = munmap(region->mmap_start, region->mmap_size);
- -              TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i",
- -                          ret, errno);
- -
- -              free(region);
- -      }
+ +      list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list)
+ +              __vm_mem_region_delete(vmp, region);
   
         /* Free sparsebit arrays. */
         sparsebit_free(&vmp->vpages_valid);
@@@ -610,10 -612,12 +610,10 @@@ void vm_userspace_mem_region_add(struc
                         (uint64_t) region->region.memory_size);
   
         /* Confirm no region with the requested slot already exists. */
- -      for (region = vm->userspace_mem_region_head; region;
- -              region = region->next) {
- -              if (region->region.slot == slot)
- -                      break;
- -      }
- -      if (region != NULL)
+ +      list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ +              if (region->region.slot != slot)
+ +                      continue;
+ +
                 TEST_FAIL("A mem region with the requested slot "
                         "already exists.\n"
                         "  requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
@@@ -622,7 -626,6 +622,7 @@@
                         region->region.slot,
                         (uint64_t) region->region.guest_phys_addr,
                         (uint64_t) region->region.memory_size);
+ +      }
   
         /* Allocate and initialize new mem region structure. */
         region = calloc(1, sizeof(*region));
@@@ -683,7 -686,10 +683,7 @@@
                 guest_paddr, (uint64_t) region->region.memory_size);
   
         /* Add to linked-list of memory regions. */
- -      if (vm->userspace_mem_region_head)
- -              vm->userspace_mem_region_head->prev = region;
- -      region->next = vm->userspace_mem_region_head;
- -      vm->userspace_mem_region_head = region;
+ +      list_add(&region->list, &vm->userspace_mem_regions);
   }
   
   /*
@@@ -706,17 -712,20 +706,17 @@@ memslot2region(struct kvm_vm *vm, uint3
   {
         struct userspace_mem_region *region;
   
- -      for (region = vm->userspace_mem_region_head; region;
- -              region = region->next) {
+ +      list_for_each_entry(region, &vm->userspace_mem_regions, list) {
                 if (region->region.slot == memslot)
- -                      break;
- -      }
- -      if (region == NULL) {
- -              fprintf(stderr, "No mem region with the requested slot found,\n"
- -                      "  requested slot: %u\n", memslot);
- -              fputs("---- vm dump ----\n", stderr);
- -              vm_dump(stderr, vm, 2);
- -              TEST_FAIL("Mem region not found");
+ +                      return region;
         }
   
- -      return region;
+ +      fprintf(stderr, "No mem region with the requested slot found,\n"
+ +              "  requested slot: %u\n", memslot);
+ +      fputs("---- vm dump ----\n", stderr);
+ +      vm_dump(stderr, vm, 2);
+ +      TEST_FAIL("Mem region not found");
+ +      return NULL;
   }
   
   /*
@@@ -779,24 -788,6 +779,24 @@@ void vm_mem_region_move(struct kvm_vm *
                     ret, errno, slot, new_gpa);
   }
   
+ +/*
+ + * VM Memory Region Delete
+ + *
+ + * Input Args:
+ + *   vm - Virtual Machine
+ + *   slot - Slot of the memory region to delete
+ + *
+ + * Output Args: None
+ + *
+ + * Return: None
+ + *
+ + * Delete a memory region.
+ + */
+ +void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
+ +{
+ +      __vm_mem_region_delete(vm, memslot2region(vm, slot));
+ +}
+ +
   /*
    * VCPU mmap Size
    *
@@@ -872,7 -863,10 +872,7 @@@ void vm_vcpu_add(struct kvm_vm *vm, uin
                 "vcpu id: %u errno: %i", vcpuid, errno);
   
         /* Add to linked-list of VCPUs. */
- -      if (vm->vcpu_head)
- -              vm->vcpu_head->prev = vcpu;
- -      vcpu->next = vm->vcpu_head;
- -      vm->vcpu_head = vcpu;
+ +      list_add(&vcpu->list, &vm->vcpus);
   }
   
   /*
@@@ -1065,8 -1059,8 +1065,8 @@@ void virt_map(struct kvm_vm *vm, uint64
   void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
   {
         struct userspace_mem_region *region;
- -      for (region = vm->userspace_mem_region_head; region;
- -           region = region->next) {
+ +
+ +      list_for_each_entry(region, &vm->userspace_mem_regions, list) {
                 if ((gpa >= region->region.guest_phys_addr)
                         && (gpa <= (region->region.guest_phys_addr
                                 + region->region.memory_size - 1)))
@@@ -1098,8 -1092,8 +1098,8 @@@
   vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
   {
         struct userspace_mem_region *region;
- -      for (region = vm->userspace_mem_region_head; region;
- -           region = region->next) {
+ +
+ +      list_for_each_entry(region, &vm->userspace_mem_regions, list) {
                 if ((hva >= region->host_mem)
                         && (hva <= (region->host_mem
                                 + region->region.memory_size - 1)))
@@@ -1207,6 -1201,15 +1207,15 @@@ void vcpu_run_complete_io(struct kvm_v
                     ret, errno);
   }
   
+ void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
+                         struct kvm_guest_debug *debug)
+ {
+       struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+       int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug);
+ 
+       TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret);
+ }
+ 
   /*
    * VM VCPU Set MP State
    *
@@@ -1526,7 -1529,8 +1535,7 @@@ void vm_dump(FILE *stream, struct kvm_v
         fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
         fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
         fprintf(stream, "%*sMem Regions:\n", indent, "");
- -      for (region = vm->userspace_mem_region_head; region;
- -              region = region->next) {
+ +      list_for_each_entry(region, &vm->userspace_mem_regions, list) {
                 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
                         "host_virt: %p\n", indent + 2, "",
                         (uint64_t) region->region.guest_phys_addr,
@@@ -1545,7 -1549,7 +1554,7 @@@
                 virt_dump(stream, vm, indent + 4);
         }
         fprintf(stream, "%*sVCPUs:\n", indent, "");
- -      for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
+ +      list_for_each_entry(vcpu, &vm->vcpus, list)
                 vcpu_dump(stream, vm, vcpu->id, indent + 2);
   }
   
@@@ -1739,11 -1743,6 +1748,11 @@@ unsigned int vm_get_max_gfn(struct kvm_
         return vm->max_gfn;
   }
   
+ +int vm_get_fd(struct kvm_vm *vm)
+ +{
+ +      return vm->fd;
+ +}
+ +
   static unsigned int vm_calc_num_pages(unsigned int num_pages,
                                       unsigned int page_shift,
                                       unsigned int new_page_shift,
diff --combined virt/kvm/kvm_main.c

index 33e1eee96f75c04601481bba80a5fd4e2b1332b3,731c1e517716f8f26c736bcdb3527fdb1a7743ff..7525f3838160229119d51d1b872d37c1700c4655
--- 1/virt/kvm/kvm_main.c
--- 2/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@@ -259,6 -259,7 +259,7 @@@ static inline bool kvm_kick_many_cpus(c
   }
   
   bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
+                                struct kvm_vcpu *except,
                                  unsigned long *vcpu_bitmap, cpumask_var_t tmp)
   {
         int i, cpu, me;
@@@ -268,7 -269,8 +269,8 @@@
         me = get_cpu();
   
         kvm_for_each_vcpu(i, vcpu, kvm) {
-               if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
+               if ((vcpu_bitmap && !test_bit(i, vcpu_bitmap)) ||
+                   vcpu == except)
                         continue;
   
                 kvm_make_request(req, vcpu);
@@@ -288,19 -290,25 +290,25 @@@
         return called;
   }
   
- bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
+ bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
+                                     struct kvm_vcpu *except)
   {
         cpumask_var_t cpus;
         bool called;
   
         zalloc_cpumask_var(&cpus, GFP_ATOMIC);
   
-       called = kvm_make_vcpus_request_mask(kvm, req, NULL, cpus);
+       called = kvm_make_vcpus_request_mask(kvm, req, except, NULL, cpus);
   
         free_cpumask_var(cpus);
         return called;
   }
   
+ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
+ {
+       return kvm_make_all_cpus_request_except(kvm, req, NULL);
+ }
+ 
   #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
   void kvm_flush_remote_tlbs(struct kvm *kvm)
   {
@@@ -710,8 -718,6 +718,8 @@@ static struct kvm *kvm_create_vm(unsign
                         goto out_err_no_arch_destroy_vm;
         }
   
+ +      kvm->max_halt_poll_ns = halt_poll_ns;
+ +
         r = kvm_arch_init_vm(kvm, type);
         if (r)
                 goto out_err_no_arch_destroy_vm;
@@@ -1604,13 -1610,16 +1612,13 @@@ struct kvm_memory_slot *kvm_vcpu_gfn_to
   {
         return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
   }
+ +EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot);
   
   bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
   {
         struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
   
- -      if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS ||
- -            memslot->flags & KVM_MEMSLOT_INVALID)
- -              return false;
- -
- -      return true;
+ +      return kvm_is_visible_memslot(memslot);
   }
   EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
   
@@@ -2715,16 -2724,15 +2723,16 @@@ out
         if (!kvm_arch_no_poll(vcpu)) {
                 if (!vcpu_valid_wakeup(vcpu)) {
                         shrink_halt_poll_ns(vcpu);
- -              } else if (halt_poll_ns) {
+ +              } else if (vcpu->kvm->max_halt_poll_ns) {
                         if (block_ns <= vcpu->halt_poll_ns)
                                 ;
                         /* we had a long block, shrink polling */
- -                      else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+ +                      else if (vcpu->halt_poll_ns &&
+ +                                      block_ns > vcpu->kvm->max_halt_poll_ns)
                                 shrink_halt_poll_ns(vcpu);
                         /* we had a short halt and our poll time is too small */
- -                      else if (vcpu->halt_poll_ns < halt_poll_ns &&
- -                              block_ns < halt_poll_ns)
+ +                      else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns &&
+ +                                      block_ns < vcpu->kvm->max_halt_poll_ns)
                                 grow_halt_poll_ns(vcpu);
                 } else {
                         vcpu->halt_poll_ns = 0;
@@@ -3031,6 -3039,8 +3039,6 @@@ static int kvm_vm_ioctl_create_vcpu(str
         if (r)
                 goto vcpu_free_run_page;
   
- -      kvm_create_vcpu_debugfs(vcpu);
- -
         mutex_lock(&kvm->lock);
         if (kvm_get_vcpu_by_id(kvm, id)) {
                 r = -EEXIST;
@@@ -3059,11 -3069,11 +3067,11 @@@
   
         mutex_unlock(&kvm->lock);
         kvm_arch_vcpu_postcreate(vcpu);
+ +      kvm_create_vcpu_debugfs(vcpu);
         return r;
   
   unlock_vcpu_destroy:
         mutex_unlock(&kvm->lock);
- -      debugfs_remove_recursive(vcpu->debugfs_dentry);
         kvm_arch_vcpu_destroy(vcpu);
   vcpu_free_run_page:
         free_page((unsigned long)vcpu->run);
@@@ -3133,7 -3143,7 +3141,7 @@@ static long kvm_vcpu_ioctl(struct file 
                                 synchronize_rcu();
                         put_pid(oldpid);
                 }
- -              r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
+ +              r = kvm_arch_vcpu_ioctl_run(vcpu);
                 trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
                 break;
         }
@@@ -3158,6 -3168,7 +3166,6 @@@ out_free1
         case KVM_SET_REGS: {
                 struct kvm_regs *kvm_regs;
   
- -              r = -ENOMEM;
                 kvm_regs = memdup_user(argp, sizeof(*kvm_regs));
                 if (IS_ERR(kvm_regs)) {
                         r = PTR_ERR(kvm_regs);
@@@ -3513,7 -3524,6 +3521,7 @@@ static long kvm_vm_ioctl_check_extensio
         case KVM_CAP_IOEVENTFD_ANY_LENGTH:
         case KVM_CAP_CHECK_EXTENSION_VM:
         case KVM_CAP_ENABLE_CAP_VM:
+ +      case KVM_CAP_HALT_POLL:
                 return 1;
   #ifdef CONFIG_KVM_MMIO
         case KVM_CAP_COALESCED_MMIO:
@@@ -3564,13 -3574,6 +3572,13 @@@ static int kvm_vm_ioctl_enable_cap_gene
                 return 0;
         }
   #endif
+ +      case KVM_CAP_HALT_POLL: {
+ +              if (cap->flags || cap->args[0] != (unsigned int)cap->args[0])
+ +                      return -EINVAL;
+ +
+ +              kvm->max_halt_poll_ns = cap->args[0];
+ +              return 0;
+ +      }
         default:
                 return kvm_vm_ioctl_enable_cap(kvm, cap);
         }
author	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 13 May 2020 16:14:05 +0000 (12:14 -0400)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 13 May 2020 16:14:05 +0000 (12:14 -0400)
		1	2
arch/arm64/kvm/guest.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/powerpc.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/kvm-s390.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/hyperv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm/nested.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm/svm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/vmx/nested.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/vmx/vmx.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/.gitignore	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/include/kvm_util.h	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/lib/kvm_util.c	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/kvm_main.c	patch \|	diff1 \|	diff2 \|	blob \| history