KVM: convert custom marker based tracing to event traces
[linux-2.6-block.git] / arch / x86 / kvm / vmx.c
index 6ee929255a3d3f9728db5f2db721c9024deadb13..c6256b98f078ae265a5ed14b7208779dcf497c1f 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/highmem.h>
 #include <linux/sched.h>
 #include <linux/moduleparam.h>
+#include <linux/ftrace_event.h>
 #include "kvm_cache_regs.h"
 #include "x86.h"
 
@@ -34,6 +35,8 @@
 #include <asm/virtext.h>
 #include <asm/mce.h>
 
+#include "trace.h"
+
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
 MODULE_AUTHOR("Qumranet");
@@ -270,6 +273,26 @@ static inline bool cpu_has_vmx_flexpriority(void)
                cpu_has_vmx_virtualize_apic_accesses();
 }
 
+static inline bool cpu_has_vmx_ept_execute_only(void)
+{
+       return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT);
+}
+
+static inline bool cpu_has_vmx_eptp_uncacheable(void)
+{
+       return !!(vmx_capability.ept & VMX_EPTP_UC_BIT);
+}
+
+static inline bool cpu_has_vmx_eptp_writeback(void)
+{
+       return !!(vmx_capability.ept & VMX_EPTP_WB_BIT);
+}
+
+static inline bool cpu_has_vmx_ept_2m_page(void)
+{
+       return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT);
+}
+
 static inline int cpu_has_vmx_invept_individual_addr(void)
 {
        return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT);
@@ -1361,6 +1384,9 @@ static __init int hardware_setup(void)
        if (!cpu_has_vmx_tpr_shadow())
                kvm_x86_ops->update_cr8_intercept = NULL;
 
+       if (enable_ept && !cpu_has_vmx_ept_2m_page())
+               kvm_disable_largepages();
+
        return alloc_kvm_area();
 }
 
@@ -2527,7 +2553,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
        uint32_t intr;
        int irq = vcpu->arch.interrupt.nr;
 
-       KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
+       trace_kvm_inj_virq(irq);
 
        ++vcpu->stat.irq_injections;
        if (vmx->rmode.vm86_active) {
@@ -2728,8 +2754,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                if (enable_ept)
                        BUG();
                cr2 = vmcs_readl(EXIT_QUALIFICATION);
-               KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
-                           (u32)((u64)cr2 >> 32), handler);
+               trace_kvm_page_fault(cr2, error_code);
+
                if (kvm_event_needs_reinjection(vcpu))
                        kvm_mmu_unprotect_page_virt(vcpu, cr2);
                return kvm_mmu_page_fault(vcpu, cr2, error_code);
@@ -2776,7 +2802,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
                                     struct kvm_run *kvm_run)
 {
        ++vcpu->stat.irq_exits;
-       KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler);
        return 1;
 }
 
@@ -2824,7 +2849,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
 
 static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-       unsigned long exit_qualification;
+       unsigned long exit_qualification, val;
        int cr;
        int reg;
 
@@ -2833,21 +2858,19 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        reg = (exit_qualification >> 8) & 15;
        switch ((exit_qualification >> 4) & 3) {
        case 0: /* mov to cr */
-               KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr,
-                           (u32)kvm_register_read(vcpu, reg),
-                           (u32)((u64)kvm_register_read(vcpu, reg) >> 32),
-                           handler);
+               val = kvm_register_read(vcpu, reg);
+               trace_kvm_cr_write(cr, val);
                switch (cr) {
                case 0:
-                       kvm_set_cr0(vcpu, kvm_register_read(vcpu, reg));
+                       kvm_set_cr0(vcpu, val);
                        skip_emulated_instruction(vcpu);
                        return 1;
                case 3:
-                       kvm_set_cr3(vcpu, kvm_register_read(vcpu, reg));
+                       kvm_set_cr3(vcpu, val);
                        skip_emulated_instruction(vcpu);
                        return 1;
                case 4:
-                       kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg));
+                       kvm_set_cr4(vcpu, val);
                        skip_emulated_instruction(vcpu);
                        return 1;
                case 8: {
@@ -2869,23 +2892,19 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                vcpu->arch.cr0 &= ~X86_CR0_TS;
                vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
                vmx_fpu_activate(vcpu);
-               KVMTRACE_0D(CLTS, vcpu, handler);
                skip_emulated_instruction(vcpu);
                return 1;
        case 1: /*mov from cr*/
                switch (cr) {
                case 3:
                        kvm_register_write(vcpu, reg, vcpu->arch.cr3);
-                       KVMTRACE_3D(CR_READ, vcpu, (u32)cr,
-                                   (u32)kvm_register_read(vcpu, reg),
-                                   (u32)((u64)kvm_register_read(vcpu, reg) >> 32),
-                                   handler);
+                       trace_kvm_cr_read(cr, vcpu->arch.cr3);
                        skip_emulated_instruction(vcpu);
                        return 1;
                case 8:
-                       kvm_register_write(vcpu, reg, kvm_get_cr8(vcpu));
-                       KVMTRACE_2D(CR_READ, vcpu, (u32)cr,
-                                   (u32)kvm_register_read(vcpu, reg), handler);
+                       val = kvm_get_cr8(vcpu);
+                       kvm_register_write(vcpu, reg, val);
+                       trace_kvm_cr_read(cr, val);
                        skip_emulated_instruction(vcpu);
                        return 1;
                }
@@ -2953,7 +2972,6 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                        val = 0;
                }
                kvm_register_write(vcpu, reg, val);
-               KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
        } else {
                val = vcpu->arch.regs[reg];
                switch (dr) {
@@ -2986,7 +3004,6 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                        }
                        break;
                }
-               KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
        }
        skip_emulated_instruction(vcpu);
        return 1;
@@ -3008,8 +3025,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                return 1;
        }
 
-       KVMTRACE_3D(MSR_READ, vcpu, ecx, (u32)data, (u32)(data >> 32),
-                   handler);
+       trace_kvm_msr_read(ecx, data);
 
        /* FIXME: handling of bits 32:63 of rax, rdx */
        vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
@@ -3024,8 +3040,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
                | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
 
-       KVMTRACE_3D(MSR_WRITE, vcpu, ecx, (u32)data, (u32)(data >> 32),
-                   handler);
+       trace_kvm_msr_write(ecx, data);
 
        if (vmx_set_msr(vcpu, ecx, data) != 0) {
                kvm_inject_gp(vcpu, 0);
@@ -3052,7 +3067,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
        cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
        vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 
-       KVMTRACE_0D(PEND_INTR, vcpu, handler);
        ++vcpu->stat.irq_window_exits;
 
        /*
@@ -3204,9 +3218,93 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        }
 
        gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+       trace_kvm_page_fault(gpa, exit_qualification);
        return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
 }
 
+static u64 ept_rsvd_mask(u64 spte, int level)
+{
+       int i;
+       u64 mask = 0;
+
+       for (i = 51; i > boot_cpu_data.x86_phys_bits; i--)
+               mask |= (1ULL << i);
+
+       if (level > 2)
+               /* bits 7:3 reserved */
+               mask |= 0xf8;
+       else if (level == 2) {
+               if (spte & (1ULL << 7))
+                       /* 2MB ref, bits 20:12 reserved */
+                       mask |= 0x1ff000;
+               else
+                       /* bits 6:3 reserved */
+                       mask |= 0x78;
+       }
+
+       return mask;
+}
+
+static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
+                                      int level)
+{
+       printk(KERN_ERR "%s: spte 0x%llx level %d\n", __func__, spte, level);
+
+       /* 010b (write-only) */
+       WARN_ON((spte & 0x7) == 0x2);
+
+       /* 110b (write/execute) */
+       WARN_ON((spte & 0x7) == 0x6);
+
+       /* 100b (execute-only) and value not supported by logical processor */
+       if (!cpu_has_vmx_ept_execute_only())
+               WARN_ON((spte & 0x7) == 0x4);
+
+       /* not 000b */
+       if ((spte & 0x7)) {
+               u64 rsvd_bits = spte & ept_rsvd_mask(spte, level);
+
+               if (rsvd_bits != 0) {
+                       printk(KERN_ERR "%s: rsvd_bits = 0x%llx\n",
+                                        __func__, rsvd_bits);
+                       WARN_ON(1);
+               }
+
+               if (level == 1 || (level == 2 && (spte & (1ULL << 7)))) {
+                       u64 ept_mem_type = (spte & 0x38) >> 3;
+
+                       if (ept_mem_type == 2 || ept_mem_type == 3 ||
+                           ept_mem_type == 7) {
+                               printk(KERN_ERR "%s: ept_mem_type=0x%llx\n",
+                                               __func__, ept_mem_type);
+                               WARN_ON(1);
+                       }
+               }
+       }
+}
+
+static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       u64 sptes[4];
+       int nr_sptes, i;
+       gpa_t gpa;
+
+       gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+
+       printk(KERN_ERR "EPT: Misconfiguration.\n");
+       printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa);
+
+       nr_sptes = kvm_mmu_get_spte_hierarchy(vcpu, gpa, sptes);
+
+       for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i)
+               ept_misconfig_inspect_spte(vcpu, sptes[i-1], i);
+
+       kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+       kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
+
+       return 0;
+}
+
 static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        u32 cpu_based_vm_exec_control;
@@ -3286,8 +3384,9 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
        [EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
        [EXIT_REASON_WBINVD]                  = handle_wbinvd,
        [EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
-       [EXIT_REASON_EPT_VIOLATION]           = handle_ept_violation,
        [EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
+       [EXIT_REASON_EPT_VIOLATION]           = handle_ept_violation,
+       [EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -3303,8 +3402,7 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
        u32 exit_reason = vmx->exit_reason;
        u32 vectoring_info = vmx->idt_vectoring_info;
 
-       KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
-                   (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
+       trace_kvm_exit(exit_reason, kvm_rip_read(vcpu));
 
        /* If we need to emulate an MMIO from handle_invalid_guest_state
         * we just return 0 */
@@ -3393,10 +3491,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 
        /* We need to handle NMIs before interrupts are enabled */
        if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
-           (exit_intr_info & INTR_INFO_VALID_MASK)) {
-               KVMTRACE_0D(NMI, &vmx->vcpu, handler);
+           (exit_intr_info & INTR_INFO_VALID_MASK))
                asm("int $2");
-       }
 
        idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
 
@@ -3544,11 +3640,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                "mov %%"R"sp, %c[host_rsp](%0) \n\t"
                __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
                "1: \n\t"
+               /* Reload cr2 if changed */
+               "mov %c[cr2](%0), %%"R"ax \n\t"
+               "mov %%cr2, %%"R"dx \n\t"
+               "cmp %%"R"ax, %%"R"dx \n\t"
+               "je 2f \n\t"
+               "mov %%"R"ax, %%cr2 \n\t"
+               "2: \n\t"
                /* Check if vmlaunch of vmresume is needed */
                "cmpl $0, %c[launched](%0) \n\t"
                /* Load guest registers.  Don't clobber flags. */
-               "mov %c[cr2](%0), %%"R"ax \n\t"
-               "mov %%"R"ax, %%cr2 \n\t"
                "mov %c[rax](%0), %%"R"ax \n\t"
                "mov %c[rbx](%0), %%"R"bx \n\t"
                "mov %c[rdx](%0), %%"R"dx \n\t"
@@ -3779,6 +3880,29 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
        return ret;
 }
 
+static const struct trace_print_flags vmx_exit_reasons_str[] = {
+       { EXIT_REASON_EXCEPTION_NMI,           "exception" },
+       { EXIT_REASON_EXTERNAL_INTERRUPT,      "ext_irq" },
+       { EXIT_REASON_TRIPLE_FAULT,            "triple_fault" },
+       { EXIT_REASON_NMI_WINDOW,              "nmi_window" },
+       { EXIT_REASON_IO_INSTRUCTION,          "io_instruction" },
+       { EXIT_REASON_CR_ACCESS,               "cr_access" },
+       { EXIT_REASON_DR_ACCESS,               "dr_access" },
+       { EXIT_REASON_CPUID,                   "cpuid" },
+       { EXIT_REASON_MSR_READ,                "rdmsr" },
+       { EXIT_REASON_MSR_WRITE,               "wrmsr" },
+       { EXIT_REASON_PENDING_INTERRUPT,       "interrupt_window" },
+       { EXIT_REASON_HLT,                     "halt" },
+       { EXIT_REASON_INVLPG,                  "invlpg" },
+       { EXIT_REASON_VMCALL,                  "hypercall" },
+       { EXIT_REASON_TPR_BELOW_THRESHOLD,     "tpr_below_thres" },
+       { EXIT_REASON_APIC_ACCESS,             "apic_access" },
+       { EXIT_REASON_WBINVD,                  "wbinvd" },
+       { EXIT_REASON_TASK_SWITCH,             "task_switch" },
+       { EXIT_REASON_EPT_VIOLATION,           "ept_violation" },
+       { -1, NULL }
+};
+
 static struct kvm_x86_ops vmx_x86_ops = {
        .cpu_has_kvm_support = cpu_has_kvm_support,
        .disabled_by_bios = vmx_disabled_by_bios,
@@ -3838,6 +3962,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .set_tss_addr = vmx_set_tss_addr,
        .get_tdp_level = get_ept_level,
        .get_mt_mask = vmx_get_mt_mask,
+
+       .exit_reasons_str = vmx_exit_reasons_str,
 };
 
 static int __init vmx_init(void)