Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 18 Sep 2019 16:49:13 +0000 (09:49 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 18 Sep 2019 16:49:13 +0000 (09:49 -0700)
Pull KVM updates from Paolo Bonzini:
 "s390:
   - ioctl hardening
   - selftests

  ARM:
   - ITS translation cache
   - support for 512 vCPUs
   - various cleanups and bugfixes

  PPC:
   - various minor fixes and preparation

  x86:
   - bugfixes all over the place (posted interrupts, SVM, emulation
     corner cases, blocked INIT)
   - some IPI optimizations"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (75 commits)
  KVM: X86: Use IPI shorthands in kvm guest when support
  KVM: x86: Fix INIT signal handling in various CPU states
  KVM: VMX: Introduce exit reason for receiving INIT signal on guest-mode
  KVM: VMX: Stop the preemption timer during vCPU reset
  KVM: LAPIC: Micro optimize IPI latency
  kvm: Nested KVM MMUs need PAE root too
  KVM: x86: set ctxt->have_exception in x86_decode_insn()
  KVM: x86: always stop emulation on page fault
  KVM: nVMX: trace nested VM-Enter failures detected by H/W
  KVM: nVMX: add tracepoint for failed nested VM-Enter
  x86: KVM: svm: Fix a check in nested_svm_vmrun()
  KVM: x86: Return to userspace with internal error on unexpected exit reason
  KVM: x86: Add kvm_emulate_{rd,wr}msr() to consolidate VXM/SVM code
  KVM: x86: Refactor up kvm_{g,s}et_msr() to simplify callers
  doc: kvm: Fix return description of KVM_SET_MSRS
  KVM: X86: Tune PLE Window tracepoint
  KVM: VMX: Change ple_window type to unsigned int
  KVM: X86: Remove tailing newline for tracepoints
  KVM: X86: Trace vcpu_id for vmexit
  KVM: x86: Manually calculate reserved bits when loading PDPTRS
  ...

13 files changed:
1  2 
arch/s390/kvm/kvm-s390.c
arch/x86/include/asm/kvm_host.h
arch/x86/kernel/kvm.c
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c
virt/kvm/arm/vgic/vgic-init.c
virt/kvm/arm/vgic/vgic-v2.c
virt/kvm/arm/vgic/vgic-v3.c
virt/kvm/arm/vgic/vgic.c

diff --combined arch/s390/kvm/kvm-s390.c
index 39cff07bf2ebef248e2f9630517e4be31ab3530c,a7d7dedfe527e48694e04e76d4875fe88bb3df05..f6db0f1bc86749ab5d513a153e3abbe341408db7
@@@ -1018,8 -1018,6 +1018,8 @@@ static int kvm_s390_vm_start_migration(
        /* mark all the pages in active slots as dirty */
        for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
                ms = slots->memslots + slotnr;
 +              if (!ms->dirty_bitmap)
 +                      return -EINVAL;
                /*
                 * The second half of the bitmap is only used on x86,
                 * and would be wasted otherwise, so we put it to good
@@@ -4000,6 -3998,10 +4000,10 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
        if (kvm_run->immediate_exit)
                return -EINTR;
  
+       if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
+           kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
+               return -EINVAL;
        vcpu_load(vcpu);
  
        if (guestdbg_exit_pending(vcpu)) {
@@@ -4257,7 -4259,7 +4261,7 @@@ static long kvm_s390_guest_mem_op(struc
        const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
                                    | KVM_S390_MEMOP_F_CHECK_ONLY;
  
-       if (mop->flags & ~supported_flags)
+       if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
                return -EINVAL;
  
        if (mop->size > MEM_OP_MAX_SIZE)
@@@ -4325,7 -4327,7 +4329,7 @@@ long kvm_arch_vcpu_async_ioctl(struct f
        }
        case KVM_S390_INTERRUPT: {
                struct kvm_s390_interrupt s390int;
 -              struct kvm_s390_irq s390irq;
 +              struct kvm_s390_irq s390irq = {};
  
                if (copy_from_user(&s390int, argp, sizeof(s390int)))
                        return -EFAULT;
index bdc16b0aa7c6f8846a0c6864f3d098592542892c,b523949a8df84014967a5023ce26501d6fa4635d..a3a3ec73fa2f858073bf0eeadb7b89574093bc69
@@@ -335,7 -335,6 +335,7 @@@ struct kvm_mmu_page 
        int root_count;          /* Currently serving as active root */
        unsigned int unsync_children;
        struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
 +      unsigned long mmu_valid_gen;
        DECLARE_BITMAP(unsync_child_bitmap, 512);
  
  #ifdef CONFIG_X86_32
@@@ -718,7 -717,7 +718,7 @@@ struct kvm_vcpu_arch 
  
        /* Cache MMIO info */
        u64 mmio_gva;
-       unsigned access;
+       unsigned mmio_access;
        gfn_t mmio_gfn;
        u64 mmio_gen;
  
@@@ -857,7 -856,6 +857,7 @@@ struct kvm_arch 
        unsigned long n_requested_mmu_pages;
        unsigned long n_max_mmu_pages;
        unsigned int indirect_shadow_pages;
 +      unsigned long mmu_valid_gen;
        struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
        /*
         * Hash table of struct kvm_mmu_page.
@@@ -1072,7 -1070,7 +1072,7 @@@ struct kvm_x86_ops 
  
        void (*run)(struct kvm_vcpu *vcpu);
        int (*handle_exit)(struct kvm_vcpu *vcpu);
-       void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+       int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
        void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
        u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
        void (*patch_hypercall)(struct kvm_vcpu *vcpu,
        uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu);
  
        bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
+       bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
  };
  
  struct kvm_arch_async_pf {
@@@ -1328,8 -1328,10 +1330,10 @@@ int kvm_emulate_instruction_from_buffer
  
  void kvm_enable_efer_bits(u64);
  bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
- int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
- int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
+ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
+ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
+ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
  
  struct x86_emulate_ctxt;
  
@@@ -1583,6 -1585,13 +1587,13 @@@ bool kvm_intr_is_single_vcpu(struct kv
  void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
                     struct kvm_lapic_irq *irq);
  
+ static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
+ {
+       /* We can only post Fixed and LowPrio IRQs */
+       return (irq->delivery_mode == dest_Fixed ||
+               irq->delivery_mode == dest_LowestPrio);
+ }
  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
  {
        if (kvm_x86_ops->vcpu_blocking)
diff --combined arch/x86/kernel/kvm.c
index b2f56602af65baa27e6d7788ed14f5acc589b229,31b68db02e8d0c6a7e7a8edcb50ea09853b10ddb..e820568ed4d5c701cc4ef76886df84212ddce57c
@@@ -311,7 -311,7 +311,7 @@@ static void kvm_guest_cpu_init(void
        if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
                u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
  
 -#ifdef CONFIG_PREEMPT
 +#ifdef CONFIG_PREEMPTION
                pa |= KVM_ASYNC_PF_SEND_ALWAYS;
  #endif
                pa |= KVM_ASYNC_PF_ENABLED;
@@@ -502,16 -502,6 +502,6 @@@ static void kvm_send_ipi_mask_allbutsel
        __send_ipi_mask(local_mask, vector);
  }
  
- static void kvm_send_ipi_allbutself(int vector)
- {
-       kvm_send_ipi_mask_allbutself(cpu_online_mask, vector);
- }
- static void kvm_send_ipi_all(int vector)
- {
-       __send_ipi_mask(cpu_online_mask, vector);
- }
  /*
   * Set the IPI entry points
   */
@@@ -519,8 -509,6 +509,6 @@@ static void kvm_setup_pv_ipi(void
  {
        apic->send_IPI_mask = kvm_send_ipi_mask;
        apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
-       apic->send_IPI_allbutself = kvm_send_ipi_allbutself;
-       apic->send_IPI_all = kvm_send_ipi_all;
        pr_info("KVM setup pv IPIs\n");
  }
  
@@@ -705,7 -693,6 +693,7 @@@ unsigned int kvm_arch_para_hints(void
  {
        return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES);
  }
 +EXPORT_SYMBOL_GPL(kvm_arch_para_hints);
  
  static uint32_t __init kvm_detect(void)
  {
@@@ -868,39 -855,3 +856,39 @@@ void __init kvm_spinlock_init(void
  }
  
  #endif        /* CONFIG_PARAVIRT_SPINLOCKS */
 +
 +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
 +
 +static void kvm_disable_host_haltpoll(void *i)
 +{
 +      wrmsrl(MSR_KVM_POLL_CONTROL, 0);
 +}
 +
 +static void kvm_enable_host_haltpoll(void *i)
 +{
 +      wrmsrl(MSR_KVM_POLL_CONTROL, 1);
 +}
 +
 +void arch_haltpoll_enable(unsigned int cpu)
 +{
 +      if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
 +              pr_err_once("kvm: host does not support poll control\n");
 +              pr_err_once("kvm: host upgrade recommended\n");
 +              return;
 +      }
 +
 +      /* Enable guest halt poll disables host halt poll */
 +      smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1);
 +}
 +EXPORT_SYMBOL_GPL(arch_haltpoll_enable);
 +
 +void arch_haltpoll_disable(unsigned int cpu)
 +{
 +      if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
 +              return;
 +
 +      /* Enable guest halt poll disables host halt poll */
 +      smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1);
 +}
 +EXPORT_SYMBOL_GPL(arch_haltpoll_disable);
 +#endif
diff --combined arch/x86/kvm/lapic.c
index 2a4f278f3b56842ecdedc1573f5f9cb796fb56fc,dbbe4781fbb26dad1c3cc73e07bf4ed7226185dc..8675458c220598baa89515aa70dc2eb572e3d247
@@@ -1198,10 -1198,8 +1198,8 @@@ void kvm_apic_set_eoi_accelerated(struc
  }
  EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
  
- static void apic_send_ipi(struct kvm_lapic *apic)
+ static void apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
  {
-       u32 icr_low = kvm_lapic_get_reg(apic, APIC_ICR);
-       u32 icr_high = kvm_lapic_get_reg(apic, APIC_ICR2);
        struct kvm_lapic_irq irq;
  
        irq.vector = icr_low & APIC_VECTOR_MASK;
@@@ -1598,7 -1596,7 +1596,7 @@@ static void start_sw_tscdeadline(struc
            likely(ns > apic->lapic_timer.timer_advance_ns)) {
                expire = ktime_add_ns(now, ns);
                expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
 -              hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS);
 +              hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
        } else
                apic_timer_expired(apic);
  
@@@ -1914,8 -1912,9 +1912,9 @@@ int kvm_lapic_reg_write(struct kvm_lapi
        }
        case APIC_ICR:
                /* No delay here, so we always clear the pending bit */
-               kvm_lapic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
-               apic_send_ipi(apic);
+               val &= ~(1 << 12);
+               apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
+               kvm_lapic_set_reg(apic, APIC_ICR, val);
                break;
  
        case APIC_ICR2:
@@@ -2299,7 -2298,7 +2298,7 @@@ int kvm_create_lapic(struct kvm_vcpu *v
        apic->vcpu = vcpu;
  
        hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
 -                   HRTIMER_MODE_ABS);
 +                   HRTIMER_MODE_ABS_HARD);
        apic->lapic_timer.timer.function = apic_timer_fn;
        if (timer_advance_ns == -1) {
                apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
@@@ -2484,7 -2483,7 +2483,7 @@@ void __kvm_migrate_apic_timer(struct kv
  
        timer = &vcpu->arch.apic->lapic_timer.timer;
        if (hrtimer_cancel(timer))
 -              hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 +              hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
  }
  
  /*
@@@ -2707,11 -2706,14 +2706,14 @@@ void kvm_apic_accept_events(struct kvm_
                return;
  
        /*
-        * INITs are latched while in SMM.  Because an SMM CPU cannot
-        * be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
-        * and delay processing of INIT until the next RSM.
+        * INITs are latched while CPU is in specific states
+        * (SMM, VMX non-root mode, SVM with GIF=0).
+        * Because a CPU cannot be in these states immediately
+        * after it has processed an INIT signal (and thus in
+        * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
+        * and leave the INIT pending.
         */
-       if (is_smm(vcpu)) {
+       if (is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu)) {
                WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
                if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
                        clear_bit(KVM_APIC_SIPI, &apic->pending_events);
diff --combined arch/x86/kvm/mmu.c
index a63964e7cec7bdd67f2b1886adefe98f03972185,9086ee4b64cbf40d3d99eef91fb93e050ec9d159..a10af9c87f8ac1a2b42bf5a6f182840bd2fce276
@@@ -214,6 -214,7 +214,7 @@@ static u64 __read_mostly shadow_accesse
  static u64 __read_mostly shadow_dirty_mask;
  static u64 __read_mostly shadow_mmio_mask;
  static u64 __read_mostly shadow_mmio_value;
+ static u64 __read_mostly shadow_mmio_access_mask;
  static u64 __read_mostly shadow_present_mask;
  static u64 __read_mostly shadow_me_mask;
  
@@@ -299,14 -300,21 +300,21 @@@ static void kvm_flush_remote_tlbs_with_
        kvm_flush_remote_tlbs_with_range(kvm, &range);
  }
  
- void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
+ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask)
  {
+       BUG_ON((u64)(unsigned)access_mask != access_mask);
        BUG_ON((mmio_mask & mmio_value) != mmio_value);
        shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK;
        shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
+       shadow_mmio_access_mask = access_mask;
  }
  EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
  
+ static bool is_mmio_spte(u64 spte)
+ {
+       return (spte & shadow_mmio_mask) == shadow_mmio_value;
+ }
  static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
  {
        return sp->role.ad_disabled;
  
  static inline bool spte_ad_enabled(u64 spte)
  {
-       MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+       MMU_WARN_ON(is_mmio_spte(spte));
        return !(spte & shadow_acc_track_value);
  }
  
  static inline u64 spte_shadow_accessed_mask(u64 spte)
  {
-       MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+       MMU_WARN_ON(is_mmio_spte(spte));
        return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
  }
  
  static inline u64 spte_shadow_dirty_mask(u64 spte)
  {
-       MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+       MMU_WARN_ON(is_mmio_spte(spte));
        return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
  }
  
@@@ -389,7 -397,7 +397,7 @@@ static void mark_mmio_spte(struct kvm_v
        u64 mask = generation_mmio_spte_mask(gen);
        u64 gpa = gfn << PAGE_SHIFT;
  
-       access &= ACC_WRITE_MASK | ACC_USER_MASK;
+       access &= shadow_mmio_access_mask;
        mask |= shadow_mmio_value | access;
        mask |= gpa | shadow_nonpresent_or_rsvd_mask;
        mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
        mmu_spte_set(sptep, mask);
  }
  
- static bool is_mmio_spte(u64 spte)
- {
-       return (spte & shadow_mmio_mask) == shadow_mmio_value;
- }
  static gfn_t get_mmio_spte_gfn(u64 spte)
  {
        u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask;
  
  static unsigned get_mmio_spte_access(u64 spte)
  {
-       u64 mask = generation_mmio_spte_mask(MMIO_SPTE_GEN_MASK) | shadow_mmio_mask;
-       return (spte & ~mask) & ~PAGE_MASK;
+       return spte & shadow_mmio_access_mask;
  }
  
  static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
@@@ -2095,12 -2097,6 +2097,12 @@@ static struct kvm_mmu_page *kvm_mmu_all
        if (!direct)
                sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
        set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 +
 +      /*
 +       * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
 +       * depends on valid pages being added to the head of the list.  See
 +       * comments in kvm_zap_obsolete_pages().
 +       */
        list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
        kvm_mod_used_mmu_pages(vcpu->kvm, +1);
        return sp;
@@@ -2250,7 -2246,7 +2252,7 @@@ static void kvm_mmu_commit_zap_page(str
  #define for_each_valid_sp(_kvm, _sp, _gfn)                            \
        hlist_for_each_entry(_sp,                                       \
          &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
 -              if ((_sp)->role.invalid) {    \
 +              if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) {    \
                } else
  
  #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)                       \
@@@ -2307,11 -2303,6 +2309,11 @@@ static void kvm_mmu_audit(struct kvm_vc
  static void mmu_audit_disable(void) { }
  #endif
  
 +static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 +{
 +      return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
 +}
 +
  static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
                         struct list_head *invalid_list)
  {
@@@ -2536,7 -2527,6 +2538,7 @@@ static struct kvm_mmu_page *kvm_mmu_get
                if (level > PT_PAGE_TABLE_LEVEL && need_sync)
                        flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
        }
 +      sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
        clear_page(sp->spt);
        trace_kvm_mmu_get_page(sp, true);
  
@@@ -3302,7 -3292,8 +3304,8 @@@ static bool handle_abnormal_pfn(struct 
        }
  
        if (unlikely(is_noslot_pfn(pfn)))
-               vcpu_cache_mmio_info(vcpu, gva, gfn, access);
+               vcpu_cache_mmio_info(vcpu, gva, gfn,
+                                    access & shadow_mmio_access_mask);
  
        return false;
  }
@@@ -4245,13 -4236,6 +4248,13 @@@ static bool fast_cr3_switch(struct kvm_
                        return false;
  
                if (cached_root_available(vcpu, new_cr3, new_role)) {
 +                      /*
 +                       * It is possible that the cached previous root page is
 +                       * obsolete because of a change in the MMU generation
 +                       * number. However, changing the generation number is
 +                       * accompanied by KVM_REQ_MMU_RELOAD, which will free
 +                       * the root set here and allocate a new one.
 +                       */
                        kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
                        if (!skip_tlb_flush) {
                                kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
@@@ -5611,13 -5595,13 +5614,13 @@@ slot_handle_leaf(struct kvm *kvm, struc
                                 PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
  }
  
- static void free_mmu_pages(struct kvm_vcpu *vcpu)
+ static void free_mmu_pages(struct kvm_mmu *mmu)
  {
-       free_page((unsigned long)vcpu->arch.mmu->pae_root);
-       free_page((unsigned long)vcpu->arch.mmu->lm_root);
+       free_page((unsigned long)mmu->pae_root);
+       free_page((unsigned long)mmu->lm_root);
  }
  
- static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
+ static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
  {
        struct page *page;
        int i;
        if (!page)
                return -ENOMEM;
  
-       vcpu->arch.mmu->pae_root = page_address(page);
+       mmu->pae_root = page_address(page);
        for (i = 0; i < 4; ++i)
-               vcpu->arch.mmu->pae_root[i] = INVALID_PAGE;
+               mmu->pae_root[i] = INVALID_PAGE;
  
        return 0;
  }
  int kvm_mmu_create(struct kvm_vcpu *vcpu)
  {
        uint i;
+       int ret;
  
        vcpu->arch.mmu = &vcpu->arch.root_mmu;
        vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
                vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
  
        vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
-       return alloc_mmu_pages(vcpu);
+       ret = alloc_mmu_pages(vcpu, &vcpu->arch.guest_mmu);
+       if (ret)
+               return ret;
+       ret = alloc_mmu_pages(vcpu, &vcpu->arch.root_mmu);
+       if (ret)
+               goto fail_allocate_root;
+       return ret;
+  fail_allocate_root:
+       free_mmu_pages(&vcpu->arch.guest_mmu);
+       return ret;
  }
  
 +
 +static void kvm_zap_obsolete_pages(struct kvm *kvm)
 +{
 +      struct kvm_mmu_page *sp, *node;
 +      LIST_HEAD(invalid_list);
 +      int ign;
 +
 +restart:
 +      list_for_each_entry_safe_reverse(sp, node,
 +            &kvm->arch.active_mmu_pages, link) {
 +              /*
 +               * No obsolete valid page exists before a newly created page
 +               * since active_mmu_pages is a FIFO list.
 +               */
 +              if (!is_obsolete_sp(kvm, sp))
 +                      break;
 +
 +              /*
 +               * Do not repeatedly zap a root page to avoid unnecessary
 +               * KVM_REQ_MMU_RELOAD, otherwise we may not be able to
 +               * progress:
 +               *    vcpu 0                        vcpu 1
 +               *                         call vcpu_enter_guest():
 +               *                            1): handle KVM_REQ_MMU_RELOAD
 +               *                                and require mmu-lock to
 +               *                                load mmu
 +               * repeat:
 +               *    1): zap root page and
 +               *        send KVM_REQ_MMU_RELOAD
 +               *
 +               *    2): if (cond_resched_lock(mmu-lock))
 +               *
 +               *                            2): hold mmu-lock and load mmu
 +               *
 +               *                            3): see KVM_REQ_MMU_RELOAD bit
 +               *                                on vcpu->requests is set
 +               *                                then return 1 to call
 +               *                                vcpu_enter_guest() again.
 +               *            goto repeat;
 +               *
 +               * Since we are reversely walking the list and the invalid
 +               * list will be moved to the head, skip the invalid page
 +               * can help us to avoid the infinity list walking.
 +               */
 +              if (sp->role.invalid)
 +                      continue;
 +
 +              if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
 +                      kvm_mmu_commit_zap_page(kvm, &invalid_list);
 +                      cond_resched_lock(&kvm->mmu_lock);
 +                      goto restart;
 +              }
 +
 +              if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
 +                      goto restart;
 +      }
 +
 +      kvm_mmu_commit_zap_page(kvm, &invalid_list);
 +}
 +
 +/*
 + * Fast invalidate all shadow pages and use lock-break technique
 + * to zap obsolete pages.
 + *
 + * It's required when memslot is being deleted or VM is being
 + * destroyed, in these cases, we should ensure that KVM MMU does
 + * not use any resource of the being-deleted slot or all slots
 + * after calling the function.
 + */
 +static void kvm_mmu_zap_all_fast(struct kvm *kvm)
 +{
 +      spin_lock(&kvm->mmu_lock);
 +      kvm->arch.mmu_valid_gen++;
 +
 +      kvm_zap_obsolete_pages(kvm);
 +      spin_unlock(&kvm->mmu_lock);
 +}
 +
  static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
                        struct kvm_memory_slot *slot,
                        struct kvm_page_track_notifier_node *node)
  {
 -      kvm_mmu_zap_all(kvm);
 +      kvm_mmu_zap_all_fast(kvm);
  }
  
  void kvm_mmu_init_vm(struct kvm *kvm)
@@@ -6094,7 -6013,7 +6110,7 @@@ static void kvm_set_mmio_spte_mask(void
        if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
                mask &= ~1ull;
  
-       kvm_mmu_set_mmio_spte_mask(mask, mask);
+       kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
  }
  
  int kvm_mmu_module_init(void)
@@@ -6168,7 -6087,8 +6184,8 @@@ unsigned long kvm_mmu_calculate_default
  void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
  {
        kvm_mmu_unload(vcpu);
-       free_mmu_pages(vcpu);
+       free_mmu_pages(&vcpu->arch.root_mmu);
+       free_mmu_pages(&vcpu->arch.guest_mmu);
        mmu_free_memory_caches(vcpu);
  }
  
diff --combined arch/x86/kvm/svm.c
index e0368076a1ef90660a460bf64f56bfa7b549fd2c,d24050b647c79c648ef10122b4b60e2b8614e88a..04fe21849b6e46adcc7dc4f7b32bea3541bf6cea
@@@ -68,10 -68,8 +68,8 @@@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id)
  #define SEG_TYPE_LDT 2
  #define SEG_TYPE_BUSY_TSS16 3
  
- #define SVM_FEATURE_NPT            (1 <<  0)
  #define SVM_FEATURE_LBRV           (1 <<  1)
  #define SVM_FEATURE_SVML           (1 <<  2)
- #define SVM_FEATURE_NRIP           (1 <<  3)
  #define SVM_FEATURE_TSC_RATE       (1 <<  4)
  #define SVM_FEATURE_VMCB_CLEAN     (1 <<  5)
  #define SVM_FEATURE_FLUSH_ASID     (1 <<  6)
@@@ -770,7 -768,7 +768,7 @@@ static void svm_set_interrupt_shadow(st
  
  }
  
- static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
  
                svm->next_rip = svm->vmcb->control.next_rip;
        }
  
-       if (!svm->next_rip) {
-               if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
-                               EMULATE_DONE)
-                       printk(KERN_DEBUG "%s: NOP\n", __func__);
-               return;
-       }
+       if (!svm->next_rip)
+               return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP);
        if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
                printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
                       __func__, kvm_rip_read(vcpu), svm->next_rip);
  
        kvm_rip_write(vcpu, svm->next_rip);
        svm_set_interrupt_shadow(vcpu, 0);
+       return EMULATE_DONE;
  }
  
  static void svm_queue_exception(struct kvm_vcpu *vcpu)
                 * raises a fault that is not intercepted. Still better than
                 * failing in all cases.
                 */
-               skip_emulated_instruction(&svm->vcpu);
+               (void)skip_emulated_instruction(&svm->vcpu);
                rip = kvm_rip_read(&svm->vcpu);
                svm->int3_rip = rip + svm->vmcb->save.cs.base;
                svm->int3_injected = rip - old_rip;
@@@ -1269,11 -1266,11 +1266,11 @@@ static void grow_ple_window(struct kvm_
                                                        pause_filter_count_grow,
                                                        pause_filter_count_max);
  
-       if (control->pause_filter_count != old)
+       if (control->pause_filter_count != old) {
                mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
-       trace_kvm_ple_window_grow(vcpu->vcpu_id,
-                                 control->pause_filter_count, old);
+               trace_kvm_ple_window_update(vcpu->vcpu_id,
+                                           control->pause_filter_count, old);
+       }
  }
  
  static void shrink_ple_window(struct kvm_vcpu *vcpu)
                                                    pause_filter_count,
                                                    pause_filter_count_shrink,
                                                    pause_filter_count);
-       if (control->pause_filter_count != old)
+       if (control->pause_filter_count != old) {
                mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
-       trace_kvm_ple_window_shrink(vcpu->vcpu_id,
-                                   control->pause_filter_count, old);
+               trace_kvm_ple_window_update(vcpu->vcpu_id,
+                                           control->pause_filter_count, old);
+       }
  }
  
  static __init int svm_hardware_setup(void)
@@@ -2136,6 -2133,9 +2133,9 @@@ static struct kvm_vcpu *svm_create_vcpu
        struct page *nested_msrpm_pages;
        int err;
  
+       BUILD_BUG_ON_MSG(offsetof(struct vcpu_svm, vcpu) != 0,
+               "struct kvm_vcpu must be at offset 0 for arch usercopy region");
        svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
        if (!svm) {
                err = -ENOMEM;
@@@ -2903,13 -2903,11 +2903,11 @@@ static int nop_on_interception(struct v
  
  static int halt_interception(struct vcpu_svm *svm)
  {
-       svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
        return kvm_emulate_halt(&svm->vcpu);
  }
  
  static int vmmcall_interception(struct vcpu_svm *svm)
  {
-       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
        return kvm_emulate_hypercall(&svm->vcpu);
  }
  
@@@ -3588,9 -3586,9 +3586,9 @@@ static void enter_svm_guest_mode(struc
        mark_all_dirty(svm->vmcb);
  }
  
- static bool nested_svm_vmrun(struct vcpu_svm *svm)
+ static int nested_svm_vmrun(struct vcpu_svm *svm)
  {
-       int rc;
+       int ret;
        struct vmcb *nested_vmcb;
        struct vmcb *hsave = svm->nested.hsave;
        struct vmcb *vmcb = svm->vmcb;
  
        vmcb_gpa = svm->vmcb->save.rax;
  
-       rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
-       if (rc) {
-               if (rc == -EINVAL)
-                       kvm_inject_gp(&svm->vcpu, 0);
-               return false;
+       ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
+       if (ret == -EINVAL) {
+               kvm_inject_gp(&svm->vcpu, 0);
+               return 1;
+       } else if (ret) {
+               return kvm_skip_emulated_instruction(&svm->vcpu);
        }
  
+       ret = kvm_skip_emulated_instruction(&svm->vcpu);
        nested_vmcb = map.hva;
  
        if (!nested_vmcb_checks(nested_vmcb)) {
  
                kvm_vcpu_unmap(&svm->vcpu, &map, true);
  
-               return false;
+               return ret;
        }
  
        trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
  
        enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
  
-       return true;
+       if (!nested_svm_vmrun_msrpm(svm)) {
+               svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
+               svm->vmcb->control.exit_code_hi = 0;
+               svm->vmcb->control.exit_info_1  = 0;
+               svm->vmcb->control.exit_info_2  = 0;
+               nested_svm_vmexit(svm);
+       }
+       return ret;
  }
  
  static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
@@@ -3697,7 -3707,6 +3707,6 @@@ static int vmload_interception(struct v
  
        nested_vmcb = map.hva;
  
-       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
        ret = kvm_skip_emulated_instruction(&svm->vcpu);
  
        nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
@@@ -3724,7 -3733,6 +3733,6 @@@ static int vmsave_interception(struct v
  
        nested_vmcb = map.hva;
  
-       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
        ret = kvm_skip_emulated_instruction(&svm->vcpu);
  
        nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
@@@ -3738,27 -3746,7 +3746,7 @@@ static int vmrun_interception(struct vc
        if (nested_svm_check_permissions(svm))
                return 1;
  
-       /* Save rip after vmrun instruction */
-       kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
-       if (!nested_svm_vmrun(svm))
-               return 1;
-       if (!nested_svm_vmrun_msrpm(svm))
-               goto failed;
-       return 1;
- failed:
-       svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
-       svm->vmcb->control.exit_code_hi = 0;
-       svm->vmcb->control.exit_info_1  = 0;
-       svm->vmcb->control.exit_info_2  = 0;
-       nested_svm_vmexit(svm);
-       return 1;
+       return nested_svm_vmrun(svm);
  }
  
  static int stgi_interception(struct vcpu_svm *svm)
        if (vgif_enabled(svm))
                clr_intercept(svm, INTERCEPT_STGI);
  
-       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
        ret = kvm_skip_emulated_instruction(&svm->vcpu);
        kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
  
@@@ -3791,7 -3778,6 +3778,6 @@@ static int clgi_interception(struct vcp
        if (nested_svm_check_permissions(svm))
                return 1;
  
-       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
        ret = kvm_skip_emulated_instruction(&svm->vcpu);
  
        disable_gif(svm);
@@@ -3816,7 -3802,6 +3802,6 @@@ static int invlpga_interception(struct 
        /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
        kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu));
  
-       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
        return kvm_skip_emulated_instruction(&svm->vcpu);
  }
  
@@@ -3839,7 -3824,6 +3824,6 @@@ static int xsetbv_interception(struct v
        u32 index = kvm_rcx_read(&svm->vcpu);
  
        if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
-               svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
                return kvm_skip_emulated_instruction(&svm->vcpu);
        }
  
@@@ -3898,25 -3882,29 +3882,29 @@@ static int task_switch_interception(str
        if (reason != TASK_SWITCH_GATE ||
            int_type == SVM_EXITINTINFO_TYPE_SOFT ||
            (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
-            (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
-               skip_emulated_instruction(&svm->vcpu);
+            (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
+               if (skip_emulated_instruction(&svm->vcpu) != EMULATE_DONE)
+                       goto fail;
+       }
  
        if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
                int_vec = -1;
  
        if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
-                               has_error_code, error_code) == EMULATE_FAIL) {
-               svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-               svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-               svm->vcpu.run->internal.ndata = 0;
-               return 0;
-       }
+                               has_error_code, error_code) == EMULATE_FAIL)
+               goto fail;
        return 1;
+ fail:
+       svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+       svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+       svm->vcpu.run->internal.ndata = 0;
+       return 0;
  }
  
  static int cpuid_interception(struct vcpu_svm *svm)
  {
-       svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
        return kvm_emulate_cpuid(&svm->vcpu);
  }
  
@@@ -4232,23 -4220,7 +4220,7 @@@ static int svm_get_msr(struct kvm_vcpu 
  
  static int rdmsr_interception(struct vcpu_svm *svm)
  {
-       u32 ecx = kvm_rcx_read(&svm->vcpu);
-       struct msr_data msr_info;
-       msr_info.index = ecx;
-       msr_info.host_initiated = false;
-       if (svm_get_msr(&svm->vcpu, &msr_info)) {
-               trace_kvm_msr_read_ex(ecx);
-               kvm_inject_gp(&svm->vcpu, 0);
-               return 1;
-       } else {
-               trace_kvm_msr_read(ecx, msr_info.data);
-               kvm_rax_write(&svm->vcpu, msr_info.data & 0xffffffff);
-               kvm_rdx_write(&svm->vcpu, msr_info.data >> 32);
-               svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
-               return kvm_skip_emulated_instruction(&svm->vcpu);
-       }
+       return kvm_emulate_rdmsr(&svm->vcpu);
  }
  
  static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
@@@ -4438,23 -4410,7 +4410,7 @@@ static int svm_set_msr(struct kvm_vcpu 
  
  static int wrmsr_interception(struct vcpu_svm *svm)
  {
-       struct msr_data msr;
-       u32 ecx = kvm_rcx_read(&svm->vcpu);
-       u64 data = kvm_read_edx_eax(&svm->vcpu);
-       msr.data = data;
-       msr.index = ecx;
-       msr.host_initiated = false;
-       svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
-       if (kvm_set_msr(&svm->vcpu, &msr)) {
-               trace_kvm_msr_write_ex(ecx, data);
-               kvm_inject_gp(&svm->vcpu, 0);
-               return 1;
-       } else {
-               trace_kvm_msr_write(ecx, data);
-               return kvm_skip_emulated_instruction(&svm->vcpu);
-       }
+       return kvm_emulate_wrmsr(&svm->vcpu);
  }
  
  static int msr_interception(struct vcpu_svm *svm)
@@@ -5025,9 -4981,14 +4981,14 @@@ static int handle_exit(struct kvm_vcpu 
  
        if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
            || !svm_exit_handlers[exit_code]) {
-               WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
-               kvm_queue_exception(vcpu, UD_VECTOR);
-               return 1;
+               vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
+               dump_vmcb(vcpu);
+               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               vcpu->run->internal.suberror =
+                       KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+               vcpu->run->internal.ndata = 1;
+               vcpu->run->internal.data[0] = exit_code;
+               return 0;
        }
  
        return svm_exit_handlers[exit_code](svm);
@@@ -5274,7 -5235,8 +5235,8 @@@ get_pi_vcpu_info(struct kvm *kvm, struc
  
        kvm_set_msi_irq(kvm, e, &irq);
  
-       if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+       if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
+           !kvm_irq_is_postable(&irq)) {
                pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
                         __func__, irq.vector);
                return -1;
@@@ -5328,6 -5290,7 +5290,7 @@@ static int svm_update_pi_irte(struct kv
                 * 1. When cannot target interrupt to a specific vcpu.
                 * 2. Unsetting posted interrupt.
                 * 3. APIC virtialization is disabled for the vcpu.
+                * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
                 */
                if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
                    kvm_vcpu_apicv_active(&svm->vcpu)) {
@@@ -5933,6 -5896,8 +5896,8 @@@ static void svm_cpuid_update(struct kvm
        guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
  }
  
+ #define F(x) bit(X86_FEATURE_##x)
  static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
  {
        switch (func) {
                if (nested)
                        entry->ecx |= (1 << 2); /* Set SVM bit */
                break;
+       case 0x80000008:
+               if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
+                    boot_cpu_has(X86_FEATURE_AMD_SSBD))
+                       entry->ebx |= F(VIRT_SSBD);
+               break;
        case 0x8000000A:
                entry->eax = 1; /* SVM revision 1 */
                entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
  
                /* Support next_rip if host supports it */
                if (boot_cpu_has(X86_FEATURE_NRIPS))
-                       entry->edx |= SVM_FEATURE_NRIP;
+                       entry->edx |= F(NRIPS);
  
                /* Support NPT for the guest if enabled */
                if (npt_enabled)
-                       entry->edx |= SVM_FEATURE_NPT;
+                       entry->edx |= F(NPT);
  
                break;
        case 0x8000001F:
@@@ -6067,6 -6037,7 +6037,7 @@@ static const struct __x86_intercept 
        [x86_intercept_ins]             = POST_EX(SVM_EXIT_IOIO),
        [x86_intercept_out]             = POST_EX(SVM_EXIT_IOIO),
        [x86_intercept_outs]            = POST_EX(SVM_EXIT_IOIO),
+       [x86_intercept_xsetbv]          = PRE_EX(SVM_EXIT_XSETBV),
  };
  
  #undef PRE_EX
@@@ -7128,6 -7099,12 +7099,6 @@@ failed
        return ret;
  }
  
 -static uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
 -{
 -      /* Not supported */
 -      return 0;
 -}
 -
  static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
                                   uint16_t *vmcs_version)
  {
@@@ -7193,6 -7170,21 +7164,21 @@@ static bool svm_need_emulation_on_page_
        return false;
  }
  
+ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+       /*
+        * TODO: Last condition latch INIT signals on vCPU when
+        * vCPU is in guest-mode and vmcb12 defines intercept on INIT.
+        * To properly emulate the INIT intercept, SVM should implement
+        * kvm_x86_ops->check_nested_events() and call nested_svm_vmexit()
+        * there if an INIT signal is pending.
+        */
+       return !gif_set(svm) ||
+                  (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
+ }
  static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
        .cpu_has_kvm_support = has_svm,
        .disabled_by_bios = is_disabled,
        .mem_enc_unreg_region = svm_unregister_enc_region,
  
        .nested_enable_evmcs = nested_enable_evmcs,
 -      .nested_get_evmcs_version = nested_get_evmcs_version,
 +      .nested_get_evmcs_version = NULL,
  
        .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
+       .apic_init_signal_blocked = svm_apic_init_signal_blocked,
  };
  
  static int __init svm_init(void)
index a3cba321b5c5ddb7b2730835c01635c0307e1bb1,6ce83c602e7fe878f567455dcc4e03e6b9ec77f9..1a10cd3519402d7c82dfe137e30d5e86c78aa66f
@@@ -19,6 -19,14 +19,14 @@@ module_param_named(enable_shadow_vmcs, 
  static bool __read_mostly nested_early_check = 0;
  module_param(nested_early_check, bool, S_IRUGO);
  
+ #define CC(consistency_check)                                         \
+ ({                                                                    \
+       bool failed = (consistency_check);                              \
+       if (failed)                                                     \
+               trace_kvm_nested_vmenter_failed(#consistency_check, 0); \
+       failed;                                                         \
+ })
  /*
   * Hyper-V requires all of these, so mark them as supported even though
   * they are just treated the same as all-context.
@@@ -430,8 -438,8 +438,8 @@@ static int nested_vmx_check_io_bitmap_c
        if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
                return 0;
  
-       if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
-           !page_address_valid(vcpu, vmcs12->io_bitmap_b))
+       if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) ||
+           CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b)))
                return -EINVAL;
  
        return 0;
@@@ -443,7 -451,7 +451,7 @@@ static int nested_vmx_check_msr_bitmap_
        if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
                return 0;
  
-       if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
+       if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap)))
                return -EINVAL;
  
        return 0;
@@@ -455,7 -463,7 +463,7 @@@ static int nested_vmx_check_tpr_shadow_
        if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
                return 0;
  
-       if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr))
+       if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)))
                return -EINVAL;
  
        return 0;
@@@ -688,7 -696,7 +696,7 @@@ static int nested_vmx_check_apic_access
                                          struct vmcs12 *vmcs12)
  {
        if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
-           !page_address_valid(vcpu, vmcs12->apic_access_addr))
+           CC(!page_address_valid(vcpu, vmcs12->apic_access_addr)))
                return -EINVAL;
        else
                return 0;
@@@ -707,16 -715,15 +715,15 @@@ static int nested_vmx_check_apicv_contr
         * If virtualize x2apic mode is enabled,
         * virtualize apic access must be disabled.
         */
-       if (nested_cpu_has_virt_x2apic_mode(vmcs12) &&
-           nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+       if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) &&
+              nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)))
                return -EINVAL;
  
        /*
         * If virtual interrupt delivery is enabled,
         * we must exit on external interrupts.
         */
-       if (nested_cpu_has_vid(vmcs12) &&
-          !nested_exit_on_intr(vcpu))
+       if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu)))
                return -EINVAL;
  
        /*
         * bits 5:0 of posted_intr_desc_addr should be zero.
         */
        if (nested_cpu_has_posted_intr(vmcs12) &&
-          (!nested_cpu_has_vid(vmcs12) ||
-           !nested_exit_intr_ack_set(vcpu) ||
-           (vmcs12->posted_intr_nv & 0xff00) ||
-           (vmcs12->posted_intr_desc_addr & 0x3f) ||
-           (vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu))))
+          (CC(!nested_cpu_has_vid(vmcs12)) ||
+           CC(!nested_exit_intr_ack_set(vcpu)) ||
+           CC((vmcs12->posted_intr_nv & 0xff00)) ||
+           CC((vmcs12->posted_intr_desc_addr & 0x3f)) ||
+           CC((vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu)))))
                return -EINVAL;
  
        /* tpr shadow is needed by all apicv features. */
-       if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
+       if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)))
                return -EINVAL;
  
        return 0;
@@@ -759,10 -766,12 +766,12 @@@ static int nested_vmx_check_msr_switch(
  static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
                                                     struct vmcs12 *vmcs12)
  {
-       if (nested_vmx_check_msr_switch(vcpu, vmcs12->vm_exit_msr_load_count,
-                                       vmcs12->vm_exit_msr_load_addr) ||
-           nested_vmx_check_msr_switch(vcpu, vmcs12->vm_exit_msr_store_count,
-                                       vmcs12->vm_exit_msr_store_addr))
+       if (CC(nested_vmx_check_msr_switch(vcpu,
+                                          vmcs12->vm_exit_msr_load_count,
+                                          vmcs12->vm_exit_msr_load_addr)) ||
+           CC(nested_vmx_check_msr_switch(vcpu,
+                                          vmcs12->vm_exit_msr_store_count,
+                                          vmcs12->vm_exit_msr_store_addr)))
                return -EINVAL;
  
        return 0;
  static int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu,
                                                        struct vmcs12 *vmcs12)
  {
-       if (nested_vmx_check_msr_switch(vcpu, vmcs12->vm_entry_msr_load_count,
-                                         vmcs12->vm_entry_msr_load_addr))
+       if (CC(nested_vmx_check_msr_switch(vcpu,
+                                          vmcs12->vm_entry_msr_load_count,
+                                          vmcs12->vm_entry_msr_load_addr)))
                  return -EINVAL;
  
        return 0;
@@@ -784,8 -794,8 +794,8 @@@ static int nested_vmx_check_pml_control
        if (!nested_cpu_has_pml(vmcs12))
                return 0;
  
-       if (!nested_cpu_has_ept(vmcs12) ||
-           !page_address_valid(vcpu, vmcs12->pml_address))
+       if (CC(!nested_cpu_has_ept(vmcs12)) ||
+           CC(!page_address_valid(vcpu, vmcs12->pml_address)))
                return -EINVAL;
  
        return 0;
  static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
                                                        struct vmcs12 *vmcs12)
  {
-       if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
-           !nested_cpu_has_ept(vmcs12))
+       if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
+              !nested_cpu_has_ept(vmcs12)))
                return -EINVAL;
        return 0;
  }
  static int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu,
                                                         struct vmcs12 *vmcs12)
  {
-       if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
-           !nested_cpu_has_ept(vmcs12))
+       if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
+              !nested_cpu_has_ept(vmcs12)))
                return -EINVAL;
        return 0;
  }
@@@ -815,8 -825,8 +825,8 @@@ static int nested_vmx_check_shadow_vmcs
        if (!nested_cpu_has_shadow_vmcs(vmcs12))
                return 0;
  
-       if (!page_address_valid(vcpu, vmcs12->vmread_bitmap) ||
-           !page_address_valid(vcpu, vmcs12->vmwrite_bitmap))
+       if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) ||
+           CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap)))
                return -EINVAL;
  
        return 0;
@@@ -826,12 -836,12 +836,12 @@@ static int nested_vmx_msr_check_common(
                                       struct vmx_msr_entry *e)
  {
        /* x2APIC MSR accesses are not allowed */
-       if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)
+       if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8))
                return -EINVAL;
-       if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
-           e->index == MSR_IA32_UCODE_REV)
+       if (CC(e->index == MSR_IA32_UCODE_WRITE) || /* SDM Table 35-2 */
+           CC(e->index == MSR_IA32_UCODE_REV))
                return -EINVAL;
-       if (e->reserved != 0)
+       if (CC(e->reserved != 0))
                return -EINVAL;
        return 0;
  }
  static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
                                     struct vmx_msr_entry *e)
  {
-       if (e->index == MSR_FS_BASE ||
-           e->index == MSR_GS_BASE ||
-           e->index == MSR_IA32_SMM_MONITOR_CTL || /* SMM is not supported */
+       if (CC(e->index == MSR_FS_BASE) ||
+           CC(e->index == MSR_GS_BASE) ||
+           CC(e->index == MSR_IA32_SMM_MONITOR_CTL) || /* SMM is not supported */
            nested_vmx_msr_check_common(vcpu, e))
                return -EINVAL;
        return 0;
  static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
                                      struct vmx_msr_entry *e)
  {
-       if (e->index == MSR_IA32_SMBASE || /* SMM is not supported */
+       if (CC(e->index == MSR_IA32_SMBASE) || /* SMM is not supported */
            nested_vmx_msr_check_common(vcpu, e))
                return -EINVAL;
        return 0;
@@@ -864,9 -874,7 +874,7 @@@ static u32 nested_vmx_load_msr(struct k
  {
        u32 i;
        struct vmx_msr_entry e;
-       struct msr_data msr;
  
-       msr.host_initiated = false;
        for (i = 0; i < count; i++) {
                if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
                                        &e, sizeof(e))) {
                                __func__, i, e.index, e.reserved);
                        goto fail;
                }
-               msr.index = e.index;
-               msr.data = e.value;
-               if (kvm_set_msr(vcpu, &msr)) {
+               if (kvm_set_msr(vcpu, e.index, e.value)) {
                        pr_debug_ratelimited(
                                "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
                                __func__, i, e.index, e.value);
@@@ -897,11 -903,11 +903,11 @@@ fail
  
  static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
  {
+       u64 data;
        u32 i;
        struct vmx_msr_entry e;
  
        for (i = 0; i < count; i++) {
-               struct msr_data msr_info;
                if (kvm_vcpu_read_guest(vcpu,
                                        gpa + i * sizeof(e),
                                        &e, 2 * sizeof(u32))) {
                                __func__, i, e.index, e.reserved);
                        return -EINVAL;
                }
-               msr_info.host_initiated = false;
-               msr_info.index = e.index;
-               if (kvm_get_msr(vcpu, &msr_info)) {
+               if (kvm_get_msr(vcpu, e.index, &data)) {
                        pr_debug_ratelimited(
                                "%s cannot read MSR (%u, 0x%x)\n",
                                __func__, i, e.index);
                if (kvm_vcpu_write_guest(vcpu,
                                         gpa + i * sizeof(e) +
                                             offsetof(struct vmx_msr_entry, value),
-                                        &msr_info.data, sizeof(msr_info.data))) {
+                                        &data, sizeof(data))) {
                        pr_debug_ratelimited(
                                "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
-                               __func__, i, e.index, msr_info.data);
+                               __func__, i, e.index, data);
                        return -EINVAL;
                }
        }
@@@ -955,7 -959,7 +959,7 @@@ static int nested_vmx_load_cr3(struct k
                               u32 *entry_failure_code)
  {
        if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
-               if (!nested_cr3_valid(vcpu, cr3)) {
+               if (CC(!nested_cr3_valid(vcpu, cr3))) {
                        *entry_failure_code = ENTRY_FAIL_DEFAULT;
                        return -EINVAL;
                }
                 * must not be dereferenced.
                 */
                if (is_pae_paging(vcpu) && !nested_ept) {
-                       if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
+                       if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
                                *entry_failure_code = ENTRY_FAIL_PDPTE;
                                return -EINVAL;
                        }
@@@ -2411,12 -2415,12 +2415,12 @@@ static int prepare_vmcs02(struct kvm_vc
  
  static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
  {
-       if (!nested_cpu_has_nmi_exiting(vmcs12) &&
-           nested_cpu_has_virtual_nmis(vmcs12))
+       if (CC(!nested_cpu_has_nmi_exiting(vmcs12) &&
+              nested_cpu_has_virtual_nmis(vmcs12)))
                return -EINVAL;
  
-       if (!nested_cpu_has_virtual_nmis(vmcs12) &&
-           nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))
+       if (CC(!nested_cpu_has_virtual_nmis(vmcs12) &&
+              nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING)))
                return -EINVAL;
  
        return 0;
@@@ -2430,11 -2434,11 +2434,11 @@@ static bool valid_ept_address(struct kv
        /* Check for memory type validity */
        switch (address & VMX_EPTP_MT_MASK) {
        case VMX_EPTP_MT_UC:
-               if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))
+               if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)))
                        return false;
                break;
        case VMX_EPTP_MT_WB:
-               if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))
+               if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)))
                        return false;
                break;
        default:
        }
  
        /* only 4 levels page-walk length are valid */
-       if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4)
+       if (CC((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4))
                return false;
  
        /* Reserved bits should not be set */
-       if (address >> maxphyaddr || ((address >> 7) & 0x1f))
+       if (CC(address >> maxphyaddr || ((address >> 7) & 0x1f)))
                return false;
  
        /* AD, if set, should be supported */
        if (address & VMX_EPTP_AD_ENABLE_BIT) {
-               if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))
+               if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)))
                        return false;
        }
  
@@@ -2466,21 -2470,21 +2470,21 @@@ static int nested_check_vm_execution_co
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
  
-       if (!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
-                               vmx->nested.msrs.pinbased_ctls_low,
-                               vmx->nested.msrs.pinbased_ctls_high) ||
-           !vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
-                               vmx->nested.msrs.procbased_ctls_low,
-                               vmx->nested.msrs.procbased_ctls_high))
+       if (CC(!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
+                                  vmx->nested.msrs.pinbased_ctls_low,
+                                  vmx->nested.msrs.pinbased_ctls_high)) ||
+           CC(!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
+                                  vmx->nested.msrs.procbased_ctls_low,
+                                  vmx->nested.msrs.procbased_ctls_high)))
                return -EINVAL;
  
        if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
-           !vmx_control_verify(vmcs12->secondary_vm_exec_control,
-                                vmx->nested.msrs.secondary_ctls_low,
-                                vmx->nested.msrs.secondary_ctls_high))
+           CC(!vmx_control_verify(vmcs12->secondary_vm_exec_control,
+                                  vmx->nested.msrs.secondary_ctls_low,
+                                  vmx->nested.msrs.secondary_ctls_high)))
                return -EINVAL;
  
-       if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu) ||
+       if (CC(vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) ||
            nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) ||
            nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) ||
            nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) ||
            nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) ||
            nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) ||
            nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) ||
-           (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
+           CC(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
                return -EINVAL;
  
        if (!nested_cpu_has_preemption_timer(vmcs12) &&
                return -EINVAL;
  
        if (nested_cpu_has_ept(vmcs12) &&
-           !valid_ept_address(vcpu, vmcs12->ept_pointer))
+           CC(!valid_ept_address(vcpu, vmcs12->ept_pointer)))
                return -EINVAL;
  
        if (nested_cpu_has_vmfunc(vmcs12)) {
-               if (vmcs12->vm_function_control &
-                   ~vmx->nested.msrs.vmfunc_controls)
+               if (CC(vmcs12->vm_function_control &
+                      ~vmx->nested.msrs.vmfunc_controls))
                        return -EINVAL;
  
                if (nested_cpu_has_eptp_switching(vmcs12)) {
-                       if (!nested_cpu_has_ept(vmcs12) ||
-                           !page_address_valid(vcpu, vmcs12->eptp_list_address))
+                       if (CC(!nested_cpu_has_ept(vmcs12)) ||
+                           CC(!page_address_valid(vcpu, vmcs12->eptp_list_address)))
                                return -EINVAL;
                }
        }
@@@ -2525,10 -2529,10 +2529,10 @@@ static int nested_check_vm_exit_control
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
  
-       if (!vmx_control_verify(vmcs12->vm_exit_controls,
-                               vmx->nested.msrs.exit_ctls_low,
-                               vmx->nested.msrs.exit_ctls_high) ||
-           nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12))
+       if (CC(!vmx_control_verify(vmcs12->vm_exit_controls,
+                                   vmx->nested.msrs.exit_ctls_low,
+                                   vmx->nested.msrs.exit_ctls_high)) ||
+           CC(nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12)))
                return -EINVAL;
  
        return 0;
@@@ -2542,9 -2546,9 +2546,9 @@@ static int nested_check_vm_entry_contro
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
  
-       if (!vmx_control_verify(vmcs12->vm_entry_controls,
-                               vmx->nested.msrs.entry_ctls_low,
-                               vmx->nested.msrs.entry_ctls_high))
+       if (CC(!vmx_control_verify(vmcs12->vm_entry_controls,
+                                   vmx->nested.msrs.entry_ctls_low,
+                                   vmx->nested.msrs.entry_ctls_high)))
                return -EINVAL;
  
        /*
                bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
  
                /* VM-entry interruption-info field: interruption type */
-               if (intr_type == INTR_TYPE_RESERVED ||
-                   (intr_type == INTR_TYPE_OTHER_EVENT &&
-                    !nested_cpu_supports_monitor_trap_flag(vcpu)))
+               if (CC(intr_type == INTR_TYPE_RESERVED) ||
+                   CC(intr_type == INTR_TYPE_OTHER_EVENT &&
+                      !nested_cpu_supports_monitor_trap_flag(vcpu)))
                        return -EINVAL;
  
                /* VM-entry interruption-info field: vector */
-               if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
-                   (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
-                   (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
+               if (CC(intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
+                   CC(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
+                   CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
                        return -EINVAL;
  
                /* VM-entry interruption-info field: deliver error code */
                should_have_error_code =
                        intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
                        x86_exception_has_error_code(vector);
-               if (has_error_code != should_have_error_code)
+               if (CC(has_error_code != should_have_error_code))
                        return -EINVAL;
  
                /* VM-entry exception error code */
-               if (has_error_code &&
-                   vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))
+               if (CC(has_error_code &&
+                      vmcs12->vm_entry_exception_error_code & GENMASK(31, 15)))
                        return -EINVAL;
  
                /* VM-entry interruption-info field: reserved bits */
-               if (intr_info & INTR_INFO_RESVD_BITS_MASK)
+               if (CC(intr_info & INTR_INFO_RESVD_BITS_MASK))
                        return -EINVAL;
  
                /* VM-entry instruction length */
                case INTR_TYPE_SOFT_EXCEPTION:
                case INTR_TYPE_SOFT_INTR:
                case INTR_TYPE_PRIV_SW_EXCEPTION:
-                       if ((vmcs12->vm_entry_instruction_len > 15) ||
-                           (vmcs12->vm_entry_instruction_len == 0 &&
-                            !nested_cpu_has_zero_length_injection(vcpu)))
+                       if (CC(vmcs12->vm_entry_instruction_len > 15) ||
+                           CC(vmcs12->vm_entry_instruction_len == 0 &&
+                           CC(!nested_cpu_has_zero_length_injection(vcpu))))
                                return -EINVAL;
                }
        }
@@@ -2625,40 -2629,40 +2629,40 @@@ static int nested_vmx_check_host_state(
  {
        bool ia32e;
  
-       if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
-           !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
-           !nested_cr3_valid(vcpu, vmcs12->host_cr3))
+       if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
+           CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
+           CC(!nested_cr3_valid(vcpu, vmcs12->host_cr3)))
                return -EINVAL;
  
-       if (is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu) ||
-           is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu))
+       if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
+           CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
                return -EINVAL;
  
        if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
-           !kvm_pat_valid(vmcs12->host_ia32_pat))
+           CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
                return -EINVAL;
  
        ia32e = (vmcs12->vm_exit_controls &
                 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
  
-       if (vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
-           vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
-           vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
-           vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
-           vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
-           vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
-           vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
-           vmcs12->host_cs_selector == 0 ||
-           vmcs12->host_tr_selector == 0 ||
-           (vmcs12->host_ss_selector == 0 && !ia32e))
+       if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
+           CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
+           CC(vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
+           CC(vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
+           CC(vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
+           CC(vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
+           CC(vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
+           CC(vmcs12->host_cs_selector == 0) ||
+           CC(vmcs12->host_tr_selector == 0) ||
+           CC(vmcs12->host_ss_selector == 0 && !ia32e))
                return -EINVAL;
  
  #ifdef CONFIG_X86_64
-       if (is_noncanonical_address(vmcs12->host_fs_base, vcpu) ||
-           is_noncanonical_address(vmcs12->host_gs_base, vcpu) ||
-           is_noncanonical_address(vmcs12->host_gdtr_base, vcpu) ||
-           is_noncanonical_address(vmcs12->host_idtr_base, vcpu) ||
-           is_noncanonical_address(vmcs12->host_tr_base, vcpu))
+       if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
+           CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
+           CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
+           CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
+           CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)))
                return -EINVAL;
  #endif
  
         * the host address-space size VM-exit control.
         */
        if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
-               if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
-                   ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
-                   ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
+               if (CC(!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer)) ||
+                   CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA)) ||
+                   CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)))
                        return -EINVAL;
        }
  
@@@ -2688,16 -2692,16 +2692,16 @@@ static int nested_vmx_check_vmcs_link_p
        if (vmcs12->vmcs_link_pointer == -1ull)
                return 0;
  
-       if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))
+       if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
                return -EINVAL;
  
-       if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
+       if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)))
                return -EINVAL;
  
        shadow = map.hva;
  
-       if (shadow->hdr.revision_id != VMCS12_REVISION ||
-           shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))
+       if (CC(shadow->hdr.revision_id != VMCS12_REVISION) ||
+           CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
                r = -EINVAL;
  
        kvm_vcpu_unmap(vcpu, &map, false);
   */
  static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
  {
-       if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
-           vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
+       if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
+              vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT))
                return -EINVAL;
  
        return 0;
@@@ -2724,12 -2728,12 +2728,12 @@@ static int nested_vmx_check_guest_state
  
        *exit_qual = ENTRY_FAIL_DEFAULT;
  
-       if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
-           !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
+       if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) ||
+           CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
                return -EINVAL;
  
        if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
-           !kvm_pat_valid(vmcs12->guest_ia32_pat))
+           CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
                return -EINVAL;
  
        if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
        if (to_vmx(vcpu)->nested.nested_run_pending &&
            (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
                ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
-               if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
-                   ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
-                   ((vmcs12->guest_cr0 & X86_CR0_PG) &&
-                    ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))
+               if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
+                   CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
+                   CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
+                    ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))))
                        return -EINVAL;
        }
  
        if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
-           (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
-            (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
+           (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
+            CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
                return -EINVAL;
  
        if (nested_check_guest_non_reg_state(vmcs12))
@@@ -2841,9 -2845,13 +2845,13 @@@ static int nested_vmx_check_vmentry_hw(
                vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
  
        if (vm_fail) {
+               u32 error = vmcs_read32(VM_INSTRUCTION_ERROR);
                preempt_enable();
-               WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
-                            VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+               trace_kvm_nested_vmenter_failed(
+                       "early hardware check VM-instruction error: ", error);
+               WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
                return 1;
        }
  
@@@ -3401,6 -3409,15 +3409,15 @@@ static int vmx_check_nested_events(stru
        unsigned long exit_qual;
        bool block_nested_events =
            vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
+       struct kvm_lapic *apic = vcpu->arch.apic;
+       if (lapic_in_kernel(vcpu) &&
+               test_bit(KVM_APIC_INIT, &apic->pending_events)) {
+               if (block_nested_events)
+                       return -EBUSY;
+               nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
+               return 0;
+       }
  
        if (vcpu->arch.exception.pending &&
                nested_vmx_check_exception(vcpu, &exit_qual)) {
@@@ -3889,7 -3906,6 +3906,6 @@@ static void nested_vmx_restore_host_sta
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        struct vmx_msr_entry g, h;
-       struct msr_data msr;
        gpa_t gpa;
        u32 i, j;
  
         * from the guest value.  The intent is to stuff host state as
         * silently as possible, not to fully process the exit load list.
         */
-       msr.host_initiated = false;
        for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
                gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
                if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
                                goto vmabort;
                        }
  
-                       msr.index = h.index;
-                       msr.data = h.value;
-                       if (kvm_set_msr(vcpu, &msr)) {
+                       if (kvm_set_msr(vcpu, h.index, h.value)) {
                                pr_debug_ratelimited(
                                        "%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
                                        __func__, j, h.index, h.value);
@@@ -4466,7 -4479,12 +4479,12 @@@ static int handle_vmoff(struct kvm_vcp
  {
        if (!nested_vmx_check_permission(vcpu))
                return 1;
        free_nested(vcpu);
+       /* Process a latched INIT during time CPU was in VMX operation */
+       kvm_make_request(KVM_REQ_EVENT, vcpu);
        return nested_vmx_succeed(vcpu);
  }
  
@@@ -4540,7 -4558,6 +4558,7 @@@ static int handle_vmread(struct kvm_vcp
        int len;
        gva_t gva = 0;
        struct vmcs12 *vmcs12;
 +      struct x86_exception e;
        short offset;
  
        if (!nested_vmx_check_permission(vcpu))
                                vmx_instruction_info, true, len, &gva))
                        return 1;
                /* _system ok, nested_vmx_check_permission has verified cpl=0 */
 -              kvm_write_guest_virt_system(vcpu, gva, &field_value, len, NULL);
 +              if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e))
 +                      kvm_inject_page_fault(vcpu, &e);
        }
  
        return nested_vmx_succeed(vcpu);
@@@ -5261,8 -5277,9 +5279,9 @@@ bool nested_vmx_exit_reflected(struct k
                return false;
  
        if (unlikely(vmx->fail)) {
-               pr_info_ratelimited("%s failed vm entry %x\n", __func__,
-                                   vmcs_read32(VM_INSTRUCTION_ERROR));
+               trace_kvm_nested_vmenter_failed(
+                       "hardware VM-instruction error: ",
+                       vmcs_read32(VM_INSTRUCTION_ERROR));
                return true;
        }
  
diff --combined arch/x86/kvm/vmx/vmx.c
index c030c96fc81a817f6e11e3b1580aa907b8bc63f7,73bf9a2e6fb6aa044651a4528b8b129158537bd3..4a99be1fae4e58193116d5e08a98ef42a4debd2b
@@@ -1472,8 -1472,11 +1472,11 @@@ static int vmx_rtit_ctl_check(struct kv
        return 0;
  }
  
- static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+ /*
+  * Returns an int to be compatible with SVM implementation (which can fail).
+  * Do not use directly, use skip_emulated_instruction() instead.
+  */
+ static int __skip_emulated_instruction(struct kvm_vcpu *vcpu)
  {
        unsigned long rip;
  
  
        /* skipping an emulated instruction also counts */
        vmx_set_interrupt_shadow(vcpu, 0);
+       return EMULATE_DONE;
+ }
+ static inline void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+ {
+       (void)__skip_emulated_instruction(vcpu);
  }
  
  static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
@@@ -4026,7 -4036,7 +4036,7 @@@ static void ept_set_mmio_spte_mask(void
         * of an EPT paging-structure entry is 110b (write/execute).
         */
        kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
-                                  VMX_EPT_MISCONFIG_WX_VALUE);
+                                  VMX_EPT_MISCONFIG_WX_VALUE, 0);
  }
  
  #define VMX_XSS_EXIT_BITMAP 0
@@@ -4152,6 -4162,7 +4162,7 @@@ static void vmx_vcpu_reset(struct kvm_v
  
        vcpu->arch.microcode_version = 0x100000000ULL;
        vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
+       vmx->hv_deadline_tsc = -1;
        kvm_set_cr8(vcpu, 0);
  
        if (!init_event) {
@@@ -4856,41 -4867,12 +4867,12 @@@ static int handle_cpuid(struct kvm_vcp
  
  static int handle_rdmsr(struct kvm_vcpu *vcpu)
  {
-       u32 ecx = kvm_rcx_read(vcpu);
-       struct msr_data msr_info;
-       msr_info.index = ecx;
-       msr_info.host_initiated = false;
-       if (vmx_get_msr(vcpu, &msr_info)) {
-               trace_kvm_msr_read_ex(ecx);
-               kvm_inject_gp(vcpu, 0);
-               return 1;
-       }
-       trace_kvm_msr_read(ecx, msr_info.data);
-       kvm_rax_write(vcpu, msr_info.data & -1u);
-       kvm_rdx_write(vcpu, (msr_info.data >> 32) & -1u);
-       return kvm_skip_emulated_instruction(vcpu);
+       return kvm_emulate_rdmsr(vcpu);
  }
  
  static int handle_wrmsr(struct kvm_vcpu *vcpu)
  {
-       struct msr_data msr;
-       u32 ecx = kvm_rcx_read(vcpu);
-       u64 data = kvm_read_edx_eax(vcpu);
-       msr.data = data;
-       msr.index = ecx;
-       msr.host_initiated = false;
-       if (kvm_set_msr(vcpu, &msr) != 0) {
-               trace_kvm_msr_write_ex(ecx, data);
-               kvm_inject_gp(vcpu, 0);
-               return 1;
-       }
-       trace_kvm_msr_write(ecx, data);
-       return kvm_skip_emulated_instruction(vcpu);
+       return kvm_emulate_wrmsr(vcpu);
  }
  
  static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
@@@ -5227,31 -5209,33 +5209,33 @@@ emulation_error
  static void grow_ple_window(struct kvm_vcpu *vcpu)
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       int old = vmx->ple_window;
+       unsigned int old = vmx->ple_window;
  
        vmx->ple_window = __grow_ple_window(old, ple_window,
                                            ple_window_grow,
                                            ple_window_max);
  
-       if (vmx->ple_window != old)
+       if (vmx->ple_window != old) {
                vmx->ple_window_dirty = true;
-       trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
+               trace_kvm_ple_window_update(vcpu->vcpu_id,
+                                           vmx->ple_window, old);
+       }
  }
  
  static void shrink_ple_window(struct kvm_vcpu *vcpu)
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       int old = vmx->ple_window;
+       unsigned int old = vmx->ple_window;
  
        vmx->ple_window = __shrink_ple_window(old, ple_window,
                                              ple_window_shrink,
                                              ple_window);
  
-       if (vmx->ple_window != old)
+       if (vmx->ple_window != old) {
                vmx->ple_window_dirty = true;
-       trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
+               trace_kvm_ple_window_update(vcpu->vcpu_id,
+                                           vmx->ple_window, old);
+       }
  }
  
  /*
@@@ -5887,8 -5871,13 +5871,13 @@@ static int vmx_handle_exit(struct kvm_v
        else {
                vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
                                exit_reason);
-               kvm_queue_exception(vcpu, UD_VECTOR);
-               return 1;
+               dump_vmcs();
+               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               vcpu->run->internal.suberror =
+                       KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+               vcpu->run->internal.ndata = 1;
+               vcpu->run->internal.data[0] = exit_reason;
+               return 0;
        }
  }
  
@@@ -6615,6 -6604,9 +6604,9 @@@ static struct kvm_vcpu *vmx_create_vcpu
        unsigned long *msr_bitmap;
        int cpu;
  
+       BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
+               "struct kvm_vcpu must be at offset 0 for arch usercopy region");
        vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
        if (!vmx)
                return ERR_PTR(-ENOMEM);
@@@ -7369,10 -7361,14 +7361,14 @@@ static int vmx_update_pi_irte(struct kv
                 * irqbalance to make the interrupts single-CPU.
                 *
                 * We will support full lowest-priority interrupt later.
+                *
+                * In addition, we can only inject generic interrupts using
+                * the PI mechanism, refuse to route others through it.
                 */
  
                kvm_set_msi_irq(kvm, e, &irq);
-               if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+               if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
+                   !kvm_irq_is_postable(&irq)) {
                        /*
                         * Make sure the IRTE is in remapped mode if
                         * we don't handle it in posted mode.
@@@ -7474,6 -7470,11 +7470,11 @@@ static bool vmx_need_emulation_on_page_
        return false;
  }
  
+ static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
+ {
+       return to_vmx(vcpu)->nested.vmxon;
+ }
  static __init int hardware_setup(void)
  {
        unsigned long host_bndcfgs;
@@@ -7705,7 -7706,7 +7706,7 @@@ static struct kvm_x86_ops vmx_x86_ops _
  
        .run = vmx_vcpu_run,
        .handle_exit = vmx_handle_exit,
-       .skip_emulated_instruction = skip_emulated_instruction,
+       .skip_emulated_instruction = __skip_emulated_instruction,
        .set_interrupt_shadow = vmx_set_interrupt_shadow,
        .get_interrupt_shadow = vmx_get_interrupt_shadow,
        .patch_hypercall = vmx_patch_hypercall,
        .set_nested_state = NULL,
        .get_vmcs12_pages = NULL,
        .nested_enable_evmcs = NULL,
 +      .nested_get_evmcs_version = NULL,
        .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
+       .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
  };
  
  static void vmx_cleanup_l1d_flush(void)
diff --combined arch/x86/kvm/x86.c
index 91602d310a3fbf0be916d794d0eff9c3beb1b9b2,f7cfd8e6edd175e0032bb4e25f015641fe507fb4..dfd64124356806325022e22dd8d149db6826803b
@@@ -674,8 -674,14 +674,14 @@@ static int kvm_read_nested_guest_page(s
                                       data, offset, len, access);
  }
  
+ static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
+ {
+       return rsvd_bits(cpuid_maxphyaddr(vcpu), 63) | rsvd_bits(5, 8) |
+              rsvd_bits(1, 2);
+ }
  /*
-  * Load the pae pdptrs.  Return true is they are all valid.
+  * Load the pae pdptrs.  Return 1 if they are all valid, 0 otherwise.
   */
  int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
  {
        }
        for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
                if ((pdpte[i] & PT_PRESENT_MASK) &&
-                   (pdpte[i] &
-                    vcpu->arch.mmu->guest_rsvd_check.rsvd_bits_mask[0][2])) {
+                   (pdpte[i] & pdptr_rsvd_bits(vcpu))) {
                        ret = 0;
                        goto out;
                }
@@@ -1254,6 -1259,13 +1259,13 @@@ static u64 kvm_get_arch_capabilities(vo
        if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
                data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
  
+       if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+               data |= ARCH_CAP_RDCL_NO;
+       if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+               data |= ARCH_CAP_SSB_NO;
+       if (!boot_cpu_has_bug(X86_BUG_MDS))
+               data |= ARCH_CAP_MDS_NO;
        return data;
  }
  
@@@ -1351,19 -1363,23 +1363,23 @@@ void kvm_enable_efer_bits(u64 mask
  EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
  
  /*
-  * Writes msr value into into the appropriate "register".
+  * Write @data into the MSR specified by @index.  Select MSR specific fault
+  * checks are bypassed if @host_initiated is %true.
   * Returns 0 on success, non-0 otherwise.
   * Assumes vcpu_load() was already called.
   */
- int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
+ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
+                        bool host_initiated)
  {
-       switch (msr->index) {
+       struct msr_data msr;
+       switch (index) {
        case MSR_FS_BASE:
        case MSR_GS_BASE:
        case MSR_KERNEL_GS_BASE:
        case MSR_CSTAR:
        case MSR_LSTAR:
-               if (is_noncanonical_address(msr->data, vcpu))
+               if (is_noncanonical_address(data, vcpu))
                        return 1;
                break;
        case MSR_IA32_SYSENTER_EIP:
                 * value, and that something deterministic happens if the guest
                 * invokes 64-bit SYSENTER.
                 */
-               msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu));
+               data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
        }
-       return kvm_x86_ops->set_msr(vcpu, msr);
+       msr.data = data;
+       msr.index = index;
+       msr.host_initiated = host_initiated;
+       return kvm_x86_ops->set_msr(vcpu, &msr);
  }
- EXPORT_SYMBOL_GPL(kvm_set_msr);
  
  /*
-  * Adapt set_msr() to msr_io()'s calling convention
+  * Read the MSR specified by @index into @data.  Select MSR specific fault
+  * checks are bypassed if @host_initiated is %true.
+  * Returns 0 on success, non-0 otherwise.
+  * Assumes vcpu_load() was already called.
   */
- static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+ static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
+                        bool host_initiated)
  {
        struct msr_data msr;
-       int r;
+       int ret;
  
        msr.index = index;
-       msr.host_initiated = true;
-       r = kvm_get_msr(vcpu, &msr);
-       if (r)
-               return r;
+       msr.host_initiated = host_initiated;
  
-       *data = msr.data;
-       return 0;
+       ret = kvm_x86_ops->get_msr(vcpu, &msr);
+       if (!ret)
+               *data = msr.data;
+       return ret;
  }
  
static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
  {
-       struct msr_data msr;
+       return __kvm_get_msr(vcpu, index, data, false);
+ }
+ EXPORT_SYMBOL_GPL(kvm_get_msr);
  
-       msr.data = *data;
-       msr.index = index;
-       msr.host_initiated = true;
-       return kvm_set_msr(vcpu, &msr);
+ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
+ {
+       return __kvm_set_msr(vcpu, index, data, false);
+ }
+ EXPORT_SYMBOL_GPL(kvm_set_msr);
+ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
+ {
+       u32 ecx = kvm_rcx_read(vcpu);
+       u64 data;
+       if (kvm_get_msr(vcpu, ecx, &data)) {
+               trace_kvm_msr_read_ex(ecx);
+               kvm_inject_gp(vcpu, 0);
+               return 1;
+       }
+       trace_kvm_msr_read(ecx, data);
+       kvm_rax_write(vcpu, data & -1u);
+       kvm_rdx_write(vcpu, (data >> 32) & -1u);
+       return kvm_skip_emulated_instruction(vcpu);
+ }
+ EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
+ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
+ {
+       u32 ecx = kvm_rcx_read(vcpu);
+       u64 data = kvm_read_edx_eax(vcpu);
+       if (kvm_set_msr(vcpu, ecx, data)) {
+               trace_kvm_msr_write_ex(ecx, data);
+               kvm_inject_gp(vcpu, 0);
+               return 1;
+       }
+       trace_kvm_msr_write(ecx, data);
+       return kvm_skip_emulated_instruction(vcpu);
+ }
+ EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
+ /*
+  * Adapt set_msr() to msr_io()'s calling convention
+  */
+ static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+ {
+       return __kvm_get_msr(vcpu, index, data, true);
+ }
+ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+ {
+       return __kvm_set_msr(vcpu, index, *data, true);
  }
  
  #ifdef CONFIG_X86_64
@@@ -2452,6 -2525,8 +2525,8 @@@ static void record_steal_time(struct kv
         * Doing a TLB flush here, on the guest's behalf, can avoid
         * expensive IPIs.
         */
+       trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
+               vcpu->arch.st.steal.preempted & KVM_VCPU_FLUSH_TLB);
        if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
                kvm_vcpu_flush_tlb(vcpu, false);
  
@@@ -2748,18 -2823,6 +2823,6 @@@ int kvm_set_msr_common(struct kvm_vcpu 
  }
  EXPORT_SYMBOL_GPL(kvm_set_msr_common);
  
- /*
-  * Reads an msr value (of 'msr_index') into 'pdata'.
-  * Returns 0 on success, non-0 otherwise.
-  * Assumes vcpu_load() was already called.
-  */
- int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
- {
-       return kvm_x86_ops->get_msr(vcpu, msr);
- }
- EXPORT_SYMBOL_GPL(kvm_get_msr);
  static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
  {
        u64 data;
@@@ -3506,8 -3569,7 +3569,7 @@@ static int kvm_vcpu_ioctl_x86_setup_mce
        for (bank = 0; bank < bank_num; bank++)
                vcpu->arch.mce_banks[bank*4] = ~(u64)0;
  
-       if (kvm_x86_ops->setup_mce)
-               kvm_x86_ops->setup_mce(vcpu);
+       kvm_x86_ops->setup_mce(vcpu);
  out:
        return r;
  }
@@@ -5312,13 -5374,6 +5374,13 @@@ int kvm_write_guest_virt_system(struct 
        /* kvm_write_guest_virt_system can pull in tons of pages. */
        vcpu->arch.l1tf_flush_l1d = true;
  
 +      /*
 +       * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED
 +       * is returned, but our callers are not ready for that and they blindly
 +       * call kvm_inject_page_fault.  Ensure that they at least do not leak
 +       * uninitialized kernel stack memory into cr2 and error code.
 +       */
 +      memset(exception, 0, sizeof(*exception));
        return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
                                           PFERR_WRITE_MASK, exception);
  }
@@@ -5377,7 -5432,7 +5439,7 @@@ static int vcpu_mmio_gva_to_gpa(struct 
         */
        if (vcpu_match_mmio_gva(vcpu, gva)
            && !permission_fault(vcpu, vcpu->arch.walk_mmu,
-                                vcpu->arch.access, 0, access)) {
+                                vcpu->arch.mmio_access, 0, access)) {
                *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
                                        (gva & (PAGE_SIZE - 1));
                trace_vcpu_match_mmio(gva, *gpa, write, false);
@@@ -5971,28 -6026,13 +6033,13 @@@ static void emulator_set_segment(struc
  static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
                            u32 msr_index, u64 *pdata)
  {
-       struct msr_data msr;
-       int r;
-       msr.index = msr_index;
-       msr.host_initiated = false;
-       r = kvm_get_msr(emul_to_vcpu(ctxt), &msr);
-       if (r)
-               return r;
-       *pdata = msr.data;
-       return 0;
+       return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
  }
  
  static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
                            u32 msr_index, u64 data)
  {
-       struct msr_data msr;
-       msr.data = data;
-       msr.index = msr_index;
-       msr.host_initiated = false;
-       return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
+       return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
  }
  
  static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
@@@ -6075,6 -6115,11 +6122,11 @@@ static void emulator_post_leave_smm(str
        kvm_smm_changed(emul_to_vcpu(ctxt));
  }
  
+ static int emulator_set_xcr(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr)
+ {
+       return __kvm_set_xcr(emul_to_vcpu(ctxt), index, xcr);
+ }
  static const struct x86_emulate_ops emulate_ops = {
        .read_gpr            = emulator_read_gpr,
        .write_gpr           = emulator_write_gpr,
        .set_hflags          = emulator_set_hflags,
        .pre_leave_smm       = emulator_pre_leave_smm,
        .post_leave_smm      = emulator_post_leave_smm,
+       .set_xcr             = emulator_set_xcr,
  };
  
  static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@@ -6390,9 -6436,11 +6443,11 @@@ static void kvm_vcpu_do_singlestep(stru
  int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
  {
        unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
-       int r = EMULATE_DONE;
+       int r;
  
-       kvm_x86_ops->skip_emulated_instruction(vcpu);
+       r = kvm_x86_ops->skip_emulated_instruction(vcpu);
+       if (unlikely(r != EMULATE_DONE))
+               return 0;
  
        /*
         * rflags is the old, "raw" value of the flags.  The new value has
@@@ -6528,8 -6576,16 +6583,16 @@@ int x86_emulate_instruction(struct kvm_
                        if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
                                                emulation_type))
                                return EMULATE_DONE;
-                       if (ctxt->have_exception && inject_emulated_exception(vcpu))
+                       if (ctxt->have_exception) {
+                               /*
+                                * #UD should result in just EMULATION_FAILED, and trap-like
+                                * exception should not be encountered during decode.
+                                */
+                               WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
+                                            exception_type(ctxt->exception.vector) == EXCPT_TRAP);
+                               inject_emulated_exception(vcpu);
                                return EMULATE_DONE;
+                       }
                        if (emulation_type & EMULTYPE_SKIP)
                                return EMULATE_FAIL;
                        return handle_emulation_failure(vcpu, emulation_type);
                kvm_rip_write(vcpu, ctxt->_eip);
                if (ctxt->eflags & X86_EFLAGS_RF)
                        kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
+               kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
                return EMULATE_DONE;
        }
  
@@@ -6601,13 -6658,12 +6665,13 @@@ restart
                unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
                toggle_interruptibility(vcpu, ctxt->interruptibility);
                vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 -              kvm_rip_write(vcpu, ctxt->eip);
 -              if (r == EMULATE_DONE && ctxt->tf)
 -                      kvm_vcpu_do_singlestep(vcpu, &r);
                if (!ctxt->have_exception ||
 -                  exception_type(ctxt->exception.vector) == EXCPT_TRAP)
 +                  exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
 +                      kvm_rip_write(vcpu, ctxt->eip);
 +                      if (r == EMULATE_DONE && ctxt->tf)
 +                              kvm_vcpu_do_singlestep(vcpu, &r);
                        __kvm_set_rflags(vcpu, ctxt->eflags);
 +              }
  
                /*
                 * For STI, interrupts are shadowed; so KVM_REQ_EVENT will
@@@ -9322,10 -9378,7 +9386,7 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
        kvm_page_track_init(kvm);
        kvm_mmu_init_vm(kvm);
  
-       if (kvm_x86_ops->vm_init)
-               return kvm_x86_ops->vm_init(kvm);
-       return 0;
+       return kvm_x86_ops->vm_init(kvm);
  }
  
  static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
@@@ -10017,7 -10070,7 +10078,7 @@@ EXPORT_SYMBOL_GPL(kvm_arch_has_noncoher
  
  bool kvm_arch_has_irq_bypass(void)
  {
-       return kvm_x86_ops->update_pi_irte != NULL;
+       return true;
  }
  
  int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
@@@ -10057,9 -10110,6 +10118,6 @@@ void kvm_arch_irq_bypass_del_producer(s
  int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
                                   uint32_t guest_irq, bool set)
  {
-       if (!kvm_x86_ops->update_pi_irte)
-               return -EINVAL;
        return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
  }
  
@@@ -10086,11 -10136,12 +10144,12 @@@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
- EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window_update);
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
index e621b5d45b278b6e1d03f997849422ad3a94fe97,958e2f0d2207c21defa47932d68667eb354461fa..6f50c429196de19c4be0e359172632666181951d
@@@ -8,7 -8,6 +8,7 @@@
  #include <linux/cpu.h>
  #include <linux/kvm_host.h>
  #include <kvm/arm_vgic.h>
 +#include <asm/kvm_emulate.h>
  #include <asm/kvm_mmu.h>
  #include "vgic.h"
  
@@@ -54,6 -53,7 +54,7 @@@ void kvm_vgic_early_init(struct kvm *kv
        struct vgic_dist *dist = &kvm->arch.vgic;
  
        INIT_LIST_HEAD(&dist->lpi_list_head);
+       INIT_LIST_HEAD(&dist->lpi_translation_cache);
        raw_spin_lock_init(&dist->lpi_list_lock);
  }
  
@@@ -165,18 -165,12 +166,18 @@@ static int kvm_vgic_dist_init(struct kv
                irq->vcpu = NULL;
                irq->target_vcpu = vcpu0;
                kref_init(&irq->refcount);
 -              if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
 +              switch (dist->vgic_model) {
 +              case KVM_DEV_TYPE_ARM_VGIC_V2:
                        irq->targets = 0;
                        irq->group = 0;
 -              } else {
 +                      break;
 +              case KVM_DEV_TYPE_ARM_VGIC_V3:
                        irq->mpidr = 0;
                        irq->group = 1;
 +                      break;
 +              default:
 +                      kfree(dist->spis);
 +                      return -EINVAL;
                }
        }
        return 0;
@@@ -199,7 -193,6 +200,6 @@@ int kvm_vgic_vcpu_init(struct kvm_vcpu 
        int i;
  
        vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
-       vgic_cpu->sgi_iodev.base_addr = VGIC_ADDR_UNDEF;
  
        INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
        raw_spin_lock_init(&vgic_cpu->ap_list_lock);
                irq->intid = i;
                irq->vcpu = NULL;
                irq->target_vcpu = vcpu;
 -              irq->targets = 1U << vcpu->vcpu_id;
                kref_init(&irq->refcount);
                if (vgic_irq_is_sgi(i)) {
                        /* SGIs */
                        /* PPIs */
                        irq->config = VGIC_CONFIG_LEVEL;
                }
 -
 -              if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
 -                      irq->group = 1;
 -              else
 -                      irq->group = 0;
        }
  
        if (!irqchip_in_kernel(vcpu->kvm))
@@@ -287,23 -286,15 +287,24 @@@ int vgic_init(struct kvm *kvm
  
                for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
                        struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
 -                      if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
 +                      switch (dist->vgic_model) {
 +                      case KVM_DEV_TYPE_ARM_VGIC_V3:
                                irq->group = 1;
 -                      else
 +                              irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
 +                              break;
 +                      case KVM_DEV_TYPE_ARM_VGIC_V2:
                                irq->group = 0;
 +                              irq->targets = 1U << idx;
 +                              break;
 +                      default:
 +                              ret = -EINVAL;
 +                              goto out;
 +                      }
                }
        }
  
        if (vgic_has_its(kvm)) {
+               vgic_lpi_translation_cache_init(kvm);
                ret = vgic_v4_init(kvm);
                if (ret)
                        goto out;
@@@ -345,6 -336,9 +346,9 @@@ static void kvm_vgic_dist_destroy(struc
                INIT_LIST_HEAD(&dist->rd_regions);
        }
  
+       if (vgic_has_its(kvm))
+               vgic_lpi_translation_cache_destroy(kvm);
        if (vgic_supports_direct_msis(kvm))
                vgic_v4_teardown(kvm);
  }
@@@ -515,7 -509,7 +519,7 @@@ int kvm_vgic_hyp_init(void
                break;
        default:
                ret = -ENODEV;
-       };
+       }
  
        if (ret)
                return ret;
index b00aa304c260eb412210cf26aa2b011dc51dfa88,e67945020b4505e3fcec233868732f237d434b13..621cc168fe3f706306b96dda5a591c91d0c8ae70
@@@ -184,10 -184,7 +184,10 @@@ void vgic_v2_populate_lr(struct kvm_vcp
                if (vgic_irq_is_sgi(irq->intid)) {
                        u32 src = ffs(irq->source);
  
 -                      BUG_ON(!src);
 +                      if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
 +                                         irq->intid))
 +                              return;
 +
                        val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
                        irq->source &= ~(1 << (src - 1));
                        if (irq->source) {
@@@ -357,10 -354,11 +357,11 @@@ out
  DEFINE_STATIC_KEY_FALSE(vgic_v2_cpuif_trap);
  
  /**
-  * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
-  * @node:     pointer to the DT node
+  * vgic_v2_probe - probe for a VGICv2 compatible interrupt controller
+  * @info:     pointer to the GIC description
   *
-  * Returns 0 if a GICv2 has been found, returns an error code otherwise
+  * Returns 0 if the VGICv2 has been probed successfully, returns an error code
+  * otherwise
   */
  int vgic_v2_probe(const struct gic_kvm_info *info)
  {
index a4ad431c92a912d2b3a9330db301059858ebc62b,30955d162a01b09775caf5a48235a0dcc03f97ad..8d69f007dd0c974305199b6aa71811428b943fe3
@@@ -167,10 -167,7 +167,10 @@@ void vgic_v3_populate_lr(struct kvm_vcp
                    model == KVM_DEV_TYPE_ARM_VGIC_V2) {
                        u32 src = ffs(irq->source);
  
 -                      BUG_ON(!src);
 +                      if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
 +                                         irq->intid))
 +                              return;
 +
                        val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
                        irq->source &= ~(1 << (src - 1));
                        if (irq->source) {
@@@ -573,10 -570,11 +573,11 @@@ static int __init early_gicv4_enable(ch
  early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
  
  /**
-  * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
-  * @node:     pointer to the DT node
+  * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller
+  * @info:     pointer to the GIC description
   *
-  * Returns 0 if a GICv3 has been found, returns an error code otherwise
+  * Returns 0 if the VGICv3 has been probed successfully, returns an error code
+  * otherwise
   */
  int vgic_v3_probe(const struct gic_kvm_info *info)
  {
diff --combined virt/kvm/arm/vgic/vgic.c
index e7bde65ba67c0802582883c195052b96cc427caf,846bb680fd4541d7fcbf631fc8562de1be6b5735..45a870cb63f584ae3de99504d60ab1a1b4c35926
@@@ -119,6 -119,22 +119,22 @@@ static void vgic_irq_release(struct kre
  {
  }
  
+ /*
+  * Drop the refcount on the LPI. Must be called with lpi_list_lock held.
+  */
+ void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
+ {
+       struct vgic_dist *dist = &kvm->arch.vgic;
+       if (!kref_put(&irq->refcount, vgic_irq_release))
+               return;
+       list_del(&irq->lpi_list);
+       dist->lpi_list_count--;
+       kfree(irq);
+ }
  void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
  {
        struct vgic_dist *dist = &kvm->arch.vgic;
                return;
  
        raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
-       if (!kref_put(&irq->refcount, vgic_irq_release)) {
-               raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
-               return;
-       };
-       list_del(&irq->lpi_list);
-       dist->lpi_list_count--;
+       __vgic_put_lpi_locked(kvm, irq);
        raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
-       kfree(irq);
  }
  
  void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
@@@ -254,13 -262,6 +262,13 @@@ static int vgic_irq_cmp(void *priv, str
        bool penda, pendb;
        int ret;
  
 +      /*
 +       * list_sort may call this function with the same element when
 +       * the list is fairly long.
 +       */
 +      if (unlikely(irqa == irqb))
 +              return 0;
 +
        raw_spin_lock(&irqa->irq_lock);
        raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);