KVM: MMU: avoid pte_list_desc running out in kvm_mmu_pte_write
authorXiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Thu, 22 Sep 2011 08:53:17 +0000 (16:53 +0800)
committerAvi Kivity <avi@redhat.com>
Tue, 27 Dec 2011 09:16:47 +0000 (11:16 +0200)
kvm_mmu_pte_write is unsafe since we need to alloc pte_list_desc in the
function when spte is prefetched, unfortunately, we can not know how many
spte need to be prefetched on this path, that means we can use out of the
free  pte_list_desc object in the cache, and BUG_ON() is triggered, also some
path does not fill the cache, such as INS instruction emulated that does not
trigger page fault

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
arch/x86/kvm/mmu.c

index f1b36cf3e3d0aff4ee1d587deb044a9618d51869..232c5a30ddc8380c679ed1ad506fb6e6cb537021 100644 (file)
@@ -593,6 +593,11 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
        return 0;
 }
 
+static int mmu_memory_cache_free_objects(struct kvm_mmu_memory_cache *cache)
+{
+       return cache->nobjs;
+}
+
 static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc,
                                  struct kmem_cache *cache)
 {
@@ -970,6 +975,14 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
        return &linfo->rmap_pde;
 }
 
+static bool rmap_can_add(struct kvm_vcpu *vcpu)
+{
+       struct kvm_mmu_memory_cache *cache;
+
+       cache = &vcpu->arch.mmu_pte_list_desc_cache;
+       return mmu_memory_cache_free_objects(cache);
+}
+
 static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 {
        struct kvm_mmu_page *sp;
@@ -3586,6 +3599,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
                break;
        }
 
+       /*
+        * No need to care whether allocation memory is successful
+        * or not since pte prefetch is skiped if it does not have
+        * enough objects in the cache.
+        */
+       mmu_topup_memory_caches(vcpu);
        spin_lock(&vcpu->kvm->mmu_lock);
        if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
                gentry = 0;
@@ -3656,7 +3675,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
                        mmu_page_zap_pte(vcpu->kvm, sp, spte);
                        if (gentry &&
                              !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
-                             & mask.word))
+                             & mask.word) && rmap_can_add(vcpu))
                                mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
                        if (!remote_flush && need_remote_flush(entry, *spte))
                                remote_flush = true;
@@ -3717,10 +3736,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
                goto out;
        }
 
-       r = mmu_topup_memory_caches(vcpu);
-       if (r)
-               goto out;
-
        er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len);
 
        switch (er) {