Merge branch 'for-linus' of git://git.linaro.org/people/rmk/linux-arm
[linux-2.6-block.git] / arch / x86 / kvm / x86.c
index c243b81e3c74b56bb69d7d26e692ac4181d73320..f71500af1f813245bb12092665ac7dea3ba5f24f 100644 (file)
@@ -872,8 +872,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
 
        kvm_x86_ops->set_efer(vcpu, efer);
 
-       vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
-
        /* Update reserved bits */
        if ((efer ^ old_efer) & EFER_NX)
                kvm_mmu_reset_context(vcpu);
@@ -1881,6 +1879,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        u64 data = msr_info->data;
 
        switch (msr) {
+       case MSR_AMD64_NB_CFG:
+       case MSR_IA32_UCODE_REV:
+       case MSR_IA32_UCODE_WRITE:
+       case MSR_VM_HSAVE_PA:
+       case MSR_AMD64_PATCH_LOADER:
+       case MSR_AMD64_BU_CFG2:
+               break;
+
        case MSR_EFER:
                return set_efer(vcpu, data);
        case MSR_K7_HWCR:
@@ -1900,8 +1906,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        return 1;
                }
                break;
-       case MSR_AMD64_NB_CFG:
-               break;
        case MSR_IA32_DEBUGCTLMSR:
                if (!data) {
                        /* We support the non-activated case already */
@@ -1914,11 +1918,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
                            __func__, data);
                break;
-       case MSR_IA32_UCODE_REV:
-       case MSR_IA32_UCODE_WRITE:
-       case MSR_VM_HSAVE_PA:
-       case MSR_AMD64_PATCH_LOADER:
-               break;
        case 0x200 ... 0x2ff:
                return set_msr_mtrr(vcpu, msr, data);
        case MSR_IA32_APICBASE:
@@ -2253,6 +2252,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_K8_INT_PENDING_MSG:
        case MSR_AMD64_NB_CFG:
        case MSR_FAM10H_MMIO_CONF_BASE:
+       case MSR_AMD64_BU_CFG2:
                data = 0;
                break;
        case MSR_P6_PERFCTR0:
@@ -2520,7 +2520,7 @@ int kvm_dev_ioctl_check_extension(long ext)
                r = KVM_MAX_VCPUS;
                break;
        case KVM_CAP_NR_MEMSLOTS:
-               r = KVM_MEMORY_SLOTS;
+               r = KVM_USER_MEM_SLOTS;
                break;
        case KVM_CAP_PV_MMU:    /* obsolete */
                r = 0;
@@ -3272,12 +3272,10 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
                return -EINVAL;
 
        mutex_lock(&kvm->slots_lock);
-       spin_lock(&kvm->mmu_lock);
 
        kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
        kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
-       spin_unlock(&kvm->mmu_lock);
        mutex_unlock(&kvm->slots_lock);
        return 0;
 }
@@ -3437,7 +3435,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
        mutex_lock(&kvm->slots_lock);
 
        r = -EINVAL;
-       if (log->slot >= KVM_MEMORY_SLOTS)
+       if (log->slot >= KVM_USER_MEM_SLOTS)
                goto out;
 
        memslot = id_to_memslot(kvm->memslots, log->slot);
@@ -4493,8 +4491,10 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
        kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
        *selector = var.selector;
 
-       if (var.unusable)
+       if (var.unusable) {
+               memset(desc, 0, sizeof(*desc));
                return false;
+       }
 
        if (var.g)
                var.limit >>= 12;
@@ -4755,26 +4755,26 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
        return r;
 }
 
-static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
+static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
+                                 bool write_fault_to_shadow_pgtable)
 {
-       gpa_t gpa;
+       gpa_t gpa = cr2;
        pfn_t pfn;
 
-       if (tdp_enabled)
-               return false;
-
-       /*
-        * if emulation was due to access to shadowed page table
-        * and it failed try to unshadow page and re-enter the
-        * guest to let CPU execute the instruction.
-        */
-       if (kvm_mmu_unprotect_page_virt(vcpu, gva))
-               return true;
-
-       gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
+       if (!vcpu->arch.mmu.direct_map) {
+               /*
+                * Write permission should be allowed since only
+                * write access need to be emulated.
+                */
+               gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
 
-       if (gpa == UNMAPPED_GVA)
-               return true; /* let cpu generate fault */
+               /*
+                * If the mapping is invalid in guest, let cpu retry
+                * it to generate fault.
+                */
+               if (gpa == UNMAPPED_GVA)
+                       return true;
+       }
 
        /*
         * Do not retry the unhandleable instruction if it faults on the
@@ -4783,12 +4783,43 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
         * instruction -> ...
         */
        pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
-       if (!is_error_noslot_pfn(pfn)) {
-               kvm_release_pfn_clean(pfn);
+
+       /*
+        * If the instruction failed on the error pfn, it can not be fixed,
+        * report the error to userspace.
+        */
+       if (is_error_noslot_pfn(pfn))
+               return false;
+
+       kvm_release_pfn_clean(pfn);
+
+       /* The instructions are well-emulated on direct mmu. */
+       if (vcpu->arch.mmu.direct_map) {
+               unsigned int indirect_shadow_pages;
+
+               spin_lock(&vcpu->kvm->mmu_lock);
+               indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
+               spin_unlock(&vcpu->kvm->mmu_lock);
+
+               if (indirect_shadow_pages)
+                       kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
+
                return true;
        }
 
-       return false;
+       /*
+        * if emulation was due to access to shadowed page table
+        * and it failed try to unshadow page and re-enter the
+        * guest to let CPU execute the instruction.
+        */
+       kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
+
+       /*
+        * If the access faults on its page table, it can not
+        * be fixed by unprotecting shadow page and it should
+        * be reported to userspace.
+        */
+       return !write_fault_to_shadow_pgtable;
 }
 
 static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
@@ -4830,7 +4861,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
        if (!vcpu->arch.mmu.direct_map)
                gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
 
-       kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+       kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
 
        return true;
 }
@@ -4847,7 +4878,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
        int r;
        struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
        bool writeback = true;
+       bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
 
+       /*
+        * Clear write_fault_to_shadow_pgtable here to ensure it is
+        * never reused.
+        */
+       vcpu->arch.write_fault_to_shadow_pgtable = false;
        kvm_clear_exception_queue(vcpu);
 
        if (!(emulation_type & EMULTYPE_NO_DECODE)) {
@@ -4866,7 +4903,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                if (r != EMULATION_OK)  {
                        if (emulation_type & EMULTYPE_TRAP_UD)
                                return EMULATE_FAIL;
-                       if (reexecute_instruction(vcpu, cr2))
+                       if (reexecute_instruction(vcpu, cr2,
+                                                 write_fault_to_spt))
                                return EMULATE_DONE;
                        if (emulation_type & EMULTYPE_SKIP)
                                return EMULATE_FAIL;
@@ -4896,7 +4934,7 @@ restart:
                return EMULATE_DONE;
 
        if (r == EMULATION_FAILED) {
-               if (reexecute_instruction(vcpu, cr2))
+               if (reexecute_instruction(vcpu, cr2, write_fault_to_spt))
                        return EMULATE_DONE;
 
                return handle_emulation_failure(vcpu);
@@ -5539,7 +5577,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
                        vcpu->arch.nmi_injected = true;
                        kvm_x86_ops->set_nmi(vcpu);
                }
-       } else if (kvm_cpu_has_interrupt(vcpu)) {
+       } else if (kvm_cpu_has_injectable_intr(vcpu)) {
                if (kvm_x86_ops->interrupt_allowed(vcpu)) {
                        kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
                                            false);
@@ -5607,6 +5645,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 #endif
 }
 
+static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
+{
+       u64 eoi_exit_bitmap[4];
+
+       memset(eoi_exit_bitmap, 0, 32);
+
+       kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
+       kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
+}
+
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
        int r;
@@ -5660,6 +5708,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                        kvm_handle_pmu_event(vcpu);
                if (kvm_check_request(KVM_REQ_PMI, vcpu))
                        kvm_deliver_pmi(vcpu);
+               if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
+                       update_eoi_exitmap(vcpu);
        }
 
        if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -5668,10 +5718,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                /* enable NMI/IRQ window open exits if needed */
                if (vcpu->arch.nmi_pending)
                        kvm_x86_ops->enable_nmi_window(vcpu);
-               else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+               else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
                        kvm_x86_ops->enable_irq_window(vcpu);
 
                if (kvm_lapic_enabled(vcpu)) {
+                       /*
+                        * Update architecture specific hints for APIC
+                        * virtual interrupt delivery.
+                        */
+                       if (kvm_x86_ops->hwapic_irr_update)
+                               kvm_x86_ops->hwapic_irr_update(vcpu,
+                                       kvm_lapic_find_highest_irr(vcpu));
                        update_cr8_intercept(vcpu);
                        kvm_lapic_sync_to_vapic(vcpu);
                }
@@ -6851,48 +6908,43 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                struct kvm_memory_slot *memslot,
                                struct kvm_memory_slot old,
                                struct kvm_userspace_memory_region *mem,
-                               int user_alloc)
+                               bool user_alloc)
 {
        int npages = memslot->npages;
-       int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
-
-       /* Prevent internal slot pages from being moved by fork()/COW. */
-       if (memslot->id >= KVM_MEMORY_SLOTS)
-               map_flags = MAP_SHARED | MAP_ANONYMOUS;
 
-       /*To keep backward compatibility with older userspace,
-        *x86 needs to handle !user_alloc case.
+       /*
+        * Only private memory slots need to be mapped here since
+        * KVM_SET_MEMORY_REGION ioctl is no longer supported.
         */
-       if (!user_alloc) {
-               if (npages && !old.npages) {
-                       unsigned long userspace_addr;
+       if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) {
+               unsigned long userspace_addr;
 
-                       userspace_addr = vm_mmap(NULL, 0,
-                                                npages * PAGE_SIZE,
-                                                PROT_READ | PROT_WRITE,
-                                                map_flags,
-                                                0);
+               /*
+                * MAP_SHARED to prevent internal slot pages from being moved
+                * by fork()/COW.
+                */
+               userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE,
+                                        PROT_READ | PROT_WRITE,
+                                        MAP_SHARED | MAP_ANONYMOUS, 0);
 
-                       if (IS_ERR((void *)userspace_addr))
-                               return PTR_ERR((void *)userspace_addr);
+               if (IS_ERR((void *)userspace_addr))
+                       return PTR_ERR((void *)userspace_addr);
 
-                       memslot->userspace_addr = userspace_addr;
-               }
+               memslot->userspace_addr = userspace_addr;
        }
 
-
        return 0;
 }
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
                                struct kvm_userspace_memory_region *mem,
                                struct kvm_memory_slot old,
-                               int user_alloc)
+                               bool user_alloc)
 {
 
        int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
 
-       if (!user_alloc && !old.user_alloc && old.npages && !npages) {
+       if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) {
                int ret;
 
                ret = vm_munmap(old.userspace_addr,
@@ -6906,11 +6958,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
        if (!kvm->arch.n_requested_mmu_pages)
                nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
 
-       spin_lock(&kvm->mmu_lock);
        if (nr_mmu_pages)
                kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
-       kvm_mmu_slot_remove_write_access(kvm, mem->slot);
-       spin_unlock(&kvm->mmu_lock);
+       /*
+        * Write protect all pages for dirty logging.
+        * Existing largepage mappings are destroyed here and new ones will
+        * not be created until the end of the logging.
+        */
+       if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
+               kvm_mmu_slot_remove_write_access(kvm, mem->slot);
        /*
         * If memory slot is created, or moved, we need to clear all
         * mmio sptes.