KVM: MMU: remove 'clear_unsync' parameter
[linux-2.6-block.git] / arch / x86 / kvm / mmu.c
index 908ea5464a518097958083156eb5b0b94f4d021c..3db0cd4b13d7683bdb007367572710679b86950b 100644 (file)
  *
  */
 
+#include "irq.h"
 #include "mmu.h"
 #include "x86.h"
 #include "kvm_cache_regs.h"
+#include "x86.h"
 
 #include <linux/kvm_host.h>
 #include <linux/types.h>
@@ -194,7 +196,6 @@ static struct percpu_counter kvm_total_used_mmu_pages;
 
 static u64 __read_mostly shadow_trap_nonpresent_pte;
 static u64 __read_mostly shadow_notrap_nonpresent_pte;
-static u64 __read_mostly shadow_base_present_pte;
 static u64 __read_mostly shadow_nx_mask;
 static u64 __read_mostly shadow_x_mask;        /* mutual exclusive with nx_mask */
 static u64 __read_mostly shadow_user_mask;
@@ -213,12 +214,6 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes);
 
-void kvm_mmu_set_base_ptes(u64 base_pte)
-{
-       shadow_base_present_pte = base_pte;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
-
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
                u64 dirty_mask, u64 nx_mask, u64 x_mask)
 {
@@ -720,7 +715,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
        }
 }
 
-static void set_spte_track_bits(u64 *sptep, u64 new_spte)
+static int set_spte_track_bits(u64 *sptep, u64 new_spte)
 {
        pfn_t pfn;
        u64 old_spte = *sptep;
@@ -731,19 +726,20 @@ static void set_spte_track_bits(u64 *sptep, u64 new_spte)
                old_spte = __xchg_spte(sptep, new_spte);
 
        if (!is_rmap_spte(old_spte))
-               return;
+               return 0;
 
        pfn = spte_to_pfn(old_spte);
        if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
                kvm_set_pfn_accessed(pfn);
        if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask))
                kvm_set_pfn_dirty(pfn);
+       return 1;
 }
 
 static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte)
 {
-       set_spte_track_bits(sptep, new_spte);
-       rmap_remove(kvm, sptep);
+       if (set_spte_track_bits(sptep, new_spte))
+               rmap_remove(kvm, sptep);
 }
 
 static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
@@ -1160,7 +1156,7 @@ static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
 }
 
 static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
-                              struct kvm_mmu_page *sp, bool clear_unsync)
+                              struct kvm_mmu_page *sp)
 {
        return 1;
 }
@@ -1290,7 +1286,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
        if (clear_unsync)
                kvm_unlink_unsync_page(vcpu->kvm, sp);
 
-       if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) {
+       if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
                kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
                return 1;
        }
@@ -1331,12 +1327,12 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu,  gfn_t gfn)
                        continue;
 
                WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
+               kvm_unlink_unsync_page(vcpu->kvm, s);
                if ((s->role.cr4_pae != !!is_pae(vcpu)) ||
-                       (vcpu->arch.mmu.sync_page(vcpu, s, true))) {
+                       (vcpu->arch.mmu.sync_page(vcpu, s))) {
                        kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list);
                        continue;
                }
-               kvm_unlink_unsync_page(vcpu->kvm, s);
                flush = true;
        }
 
@@ -1962,9 +1958,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                    unsigned pte_access, int user_fault,
                    int write_fault, int dirty, int level,
                    gfn_t gfn, pfn_t pfn, bool speculative,
-                   bool can_unsync, bool reset_host_protection)
+                   bool can_unsync, bool host_writable)
 {
-       u64 spte;
+       u64 spte, entry = *sptep;
        int ret = 0;
 
        /*
@@ -1972,7 +1968,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
         * whether the guest actually used the pte (in order to detect
         * demand paging).
         */
-       spte = shadow_base_present_pte;
+       spte = PT_PRESENT_MASK;
        if (!speculative)
                spte |= shadow_accessed_mask;
        if (!dirty)
@@ -1989,7 +1985,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
                        kvm_is_mmio_pfn(pfn));
 
-       if (reset_host_protection)
+       if (host_writable)
                spte |= SPTE_HOST_WRITEABLE;
 
        spte |= (u64)pfn << PAGE_SHIFT;
@@ -2035,6 +2031,14 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
 set_pte:
        update_spte(sptep, spte);
+       /*
+        * If we overwrite a writable spte with a read-only one we
+        * should flush remote TLBs. Otherwise rmap_write_protect
+        * will find a read-only spte, even though the writable spte
+        * might be cached on a CPU's TLB.
+        */
+       if (is_writable_pte(entry) && !is_writable_pte(*sptep))
+               kvm_flush_remote_tlbs(vcpu->kvm);
 done:
        return ret;
 }
@@ -2044,7 +2048,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                         int user_fault, int write_fault, int dirty,
                         int *ptwrite, int level, gfn_t gfn,
                         pfn_t pfn, bool speculative,
-                        bool reset_host_protection)
+                        bool host_writable)
 {
        int was_rmapped = 0;
        int rmap_count;
@@ -2079,7 +2083,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
        if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
                      dirty, level, gfn, pfn, speculative, true,
-                     reset_host_protection)) {
+                     host_writable)) {
                if (write_fault)
                        *ptwrite = 1;
                kvm_mmu_flush_tlb(vcpu);
@@ -2210,7 +2214,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
 }
 
 static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
-                       int level, gfn_t gfn, pfn_t pfn)
+                       int map_writable, int level, gfn_t gfn, pfn_t pfn)
 {
        struct kvm_shadow_walk_iterator iterator;
        struct kvm_mmu_page *sp;
@@ -2219,9 +2223,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 
        for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
                if (iterator.level == level) {
-                       mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
+                       unsigned pte_access = ACC_ALL;
+
+                       if (!map_writable)
+                               pte_access &= ~ACC_WRITE_MASK;
+                       mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
                                     0, write, 1, &pt_write,
-                                    level, gfn, pfn, false, true);
+                                    level, gfn, pfn, false, map_writable);
                        direct_pte_prefetch(vcpu, iterator.sptep);
                        ++vcpu->stat.pf_fixed;
                        break;
@@ -2276,12 +2284,17 @@ static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn)
        return 1;
 }
 
-static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
+static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
+                        gva_t gva, pfn_t *pfn, bool write, bool *writable);
+
+static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn,
+                        bool no_apf)
 {
        int r;
        int level;
        pfn_t pfn;
        unsigned long mmu_seq;
+       bool map_writable;
 
        level = mapping_level(vcpu, gfn);
 
@@ -2296,7 +2309,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 
        mmu_seq = vcpu->kvm->mmu_notifier_seq;
        smp_rmb();
-       pfn = gfn_to_pfn(vcpu->kvm, gfn);
+
+       if (try_async_pf(vcpu, no_apf, gfn, v, &pfn, write, &map_writable))
+               return 0;
 
        /* mmio */
        if (is_error_pfn(pfn))
@@ -2306,7 +2321,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
        if (mmu_notifier_retry(vcpu, mmu_seq))
                goto out_unlock;
        kvm_mmu_free_some_pages(vcpu);
-       r = __direct_map(vcpu, v, write, level, gfn, pfn);
+       r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn);
        spin_unlock(&vcpu->kvm->mmu_lock);
 
 
@@ -2393,7 +2408,8 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
                        ASSERT(!VALID_PAGE(root));
                        spin_lock(&vcpu->kvm->mmu_lock);
                        kvm_mmu_free_some_pages(vcpu);
-                       sp = kvm_mmu_get_page(vcpu, i << 30, i << 30,
+                       sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
+                                             i << 30,
                                              PT32_ROOT_LEVEL, 1, ACC_ALL,
                                              NULL);
                        root = __pa(sp->spt);
@@ -2528,6 +2544,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
                hpa_t root = vcpu->arch.mmu.root_hpa;
                sp = page_header(root);
                mmu_sync_children(vcpu, sp);
+               trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
                return;
        }
        for (i = 0; i < 4; ++i) {
@@ -2566,7 +2583,7 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
 }
 
 static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
-                               u32 error_code)
+                               u32 error_code, bool no_apf)
 {
        gfn_t gfn;
        int r;
@@ -2582,17 +2599,65 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
        gfn = gva >> PAGE_SHIFT;
 
        return nonpaging_map(vcpu, gva & PAGE_MASK,
-                            error_code & PFERR_WRITE_MASK, gfn);
+                            error_code & PFERR_WRITE_MASK, gfn, no_apf);
+}
+
+static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
+{
+       struct kvm_arch_async_pf arch;
+       arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
+       arch.gfn = gfn;
+       arch.direct_map = vcpu->arch.mmu.direct_map;
+
+       return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
+}
+
+static bool can_do_async_pf(struct kvm_vcpu *vcpu)
+{
+       if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
+                    kvm_event_needs_reinjection(vcpu)))
+               return false;
+
+       return kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
-static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
-                               u32 error_code)
+static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
+                        gva_t gva, pfn_t *pfn, bool write, bool *writable)
+{
+       bool async;
+
+       *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable);
+
+       if (!async)
+               return false; /* *pfn has correct page already */
+
+       put_page(pfn_to_page(*pfn));
+
+       if (!no_apf && can_do_async_pf(vcpu)) {
+               trace_kvm_try_async_get_page(gva, gfn);
+               if (kvm_find_async_pf_gfn(vcpu, gfn)) {
+                       trace_kvm_async_pf_doublefault(gva, gfn);
+                       kvm_make_request(KVM_REQ_APF_HALT, vcpu);
+                       return true;
+               } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
+                       return true;
+       }
+
+       *pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable);
+
+       return false;
+}
+
+static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
+                         bool no_apf)
 {
        pfn_t pfn;
        int r;
        int level;
        gfn_t gfn = gpa >> PAGE_SHIFT;
        unsigned long mmu_seq;
+       int write = error_code & PFERR_WRITE_MASK;
+       bool map_writable;
 
        ASSERT(vcpu);
        ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -2607,14 +2672,18 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 
        mmu_seq = vcpu->kvm->mmu_notifier_seq;
        smp_rmb();
-       pfn = gfn_to_pfn(vcpu->kvm, gfn);
+
+       if (try_async_pf(vcpu, no_apf, gfn, gpa, &pfn, write, &map_writable))
+               return 0;
+
+       /* mmio */
        if (is_error_pfn(pfn))
                return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
        spin_lock(&vcpu->kvm->mmu_lock);
        if (mmu_notifier_retry(vcpu, mmu_seq))
                goto out_unlock;
        kvm_mmu_free_some_pages(vcpu);
-       r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
+       r = __direct_map(vcpu, gpa, write, map_writable,
                         level, gfn, pfn);
        spin_unlock(&vcpu->kvm->mmu_lock);
 
@@ -3267,7 +3336,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
        int r;
        enum emulation_result er;
 
-       r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);
+       r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
        if (r < 0)
                goto out;