From: Sean Christopherson Date: Mon, 22 Jun 2020 20:20:29 +0000 (-0700) Subject: KVM: x86/mmu: Move mmu_audit.c and mmutrace.h into the mmu/ sub-directory X-Git-Tag: v5.9-rc1~121^2~93 X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=33e3042dac6bcc33b80835f7d7b502b1d74c457c;p=linux-2.6-block.git KVM: x86/mmu: Move mmu_audit.c and mmutrace.h into the mmu/ sub-directory Move mmu_audit.c and mmutrace.h under mmu/ where they belong. Signed-off-by: Sean Christopherson Message-Id: <20200622202034.15093-2-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- diff --git a/arch/x86/kvm/mmu/mmu_audit.c b/arch/x86/kvm/mmu/mmu_audit.c new file mode 100644 index 000000000000..9d2844f87f6d --- /dev/null +++ b/arch/x86/kvm/mmu/mmu_audit.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * mmu_audit.c: + * + * Audit code for KVM MMU + * + * Copyright (C) 2006 Qumranet, Inc. + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * + * Authors: + * Yaniv Kamay + * Avi Kivity + * Marcelo Tosatti + * Xiao Guangrong + */ + +#include + +static char const *audit_point_name[] = { + "pre page fault", + "post page fault", + "pre pte write", + "post pte write", + "pre sync", + "post sync" +}; + +#define audit_printk(kvm, fmt, args...) \ + printk(KERN_ERR "audit: (%s) error: " \ + fmt, audit_point_name[kvm->arch.audit_point], ##args) + +typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level); + +static void __mmu_spte_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, + inspect_spte_fn fn, int level) +{ + int i; + + for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { + u64 *ent = sp->spt; + + fn(vcpu, ent + i, level); + + if (is_shadow_present_pte(ent[i]) && + !is_last_spte(ent[i], level)) { + struct kvm_mmu_page *child; + + child = page_header(ent[i] & PT64_BASE_ADDR_MASK); + __mmu_spte_walk(vcpu, child, fn, level - 1); + } + } +} + +static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn) +{ + int i; + struct kvm_mmu_page *sp; + + if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) + return; + + if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) { + hpa_t root = vcpu->arch.mmu->root_hpa; + + sp = page_header(root); + __mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu->root_level); + return; + } + + for (i = 0; i < 4; ++i) { + hpa_t root = vcpu->arch.mmu->pae_root[i]; + + if (root && VALID_PAGE(root)) { + root &= PT64_BASE_ADDR_MASK; + sp = page_header(root); + __mmu_spte_walk(vcpu, sp, fn, 2); + } + } + + return; +} + +typedef void (*sp_handler) (struct kvm *kvm, struct kvm_mmu_page *sp); + +static void walk_all_active_sps(struct kvm *kvm, sp_handler fn) +{ + struct kvm_mmu_page *sp; + + list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) + fn(kvm, sp); +} + +static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) +{ + struct kvm_mmu_page *sp; + gfn_t gfn; + kvm_pfn_t pfn; + hpa_t hpa; + + sp = page_header(__pa(sptep)); + + if (sp->unsync) { + if (level != PG_LEVEL_4K) { + audit_printk(vcpu->kvm, "unsync sp: %p " + "level = %d\n", sp, level); + return; + } + } + + if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep, level)) + return; + + gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); + pfn = kvm_vcpu_gfn_to_pfn_atomic(vcpu, gfn); + + if (is_error_pfn(pfn)) + return; + + hpa = pfn << PAGE_SHIFT; + if ((*sptep & PT64_BASE_ADDR_MASK) != hpa) + audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx " + "ent %llxn", vcpu->arch.mmu->root_level, pfn, + hpa, *sptep); +} + +static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) +{ + static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); + struct kvm_rmap_head *rmap_head; + struct kvm_mmu_page *rev_sp; + struct kvm_memslots *slots; + struct kvm_memory_slot *slot; + gfn_t gfn; + + rev_sp = page_header(__pa(sptep)); + gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); + + slots = kvm_memslots_for_spte_role(kvm, rev_sp->role); + slot = __gfn_to_memslot(slots, gfn); + if (!slot) { + if (!__ratelimit(&ratelimit_state)) + return; + audit_printk(kvm, "no memslot for gfn %llx\n", gfn); + audit_printk(kvm, "index %ld of sp (gfn=%llx)\n", + (long int)(sptep - rev_sp->spt), rev_sp->gfn); + dump_stack(); + return; + } + + rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot); + if (!rmap_head->val) { + if (!__ratelimit(&ratelimit_state)) + return; + audit_printk(kvm, "no rmap for writable spte %llx\n", + *sptep); + dump_stack(); + } +} + +static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level) +{ + if (is_shadow_present_pte(*sptep) && is_last_spte(*sptep, level)) + inspect_spte_has_rmap(vcpu->kvm, sptep); +} + +static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level) +{ + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + + if (vcpu->kvm->arch.audit_point == AUDIT_POST_SYNC && sp->unsync) + audit_printk(vcpu->kvm, "meet unsync sp(%p) after sync " + "root.\n", sp); +} + +static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + int i; + + if (sp->role.level != PG_LEVEL_4K) + return; + + for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { + if (!is_shadow_present_pte(sp->spt[i])) + continue; + + inspect_spte_has_rmap(kvm, sp->spt + i); + } +} + +static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + struct kvm_rmap_head *rmap_head; + u64 *sptep; + struct rmap_iterator iter; + struct kvm_memslots *slots; + struct kvm_memory_slot *slot; + + if (sp->role.direct || sp->unsync || sp->role.invalid) + return; + + slots = kvm_memslots_for_spte_role(kvm, sp->role); + slot = __gfn_to_memslot(slots, sp->gfn); + rmap_head = __gfn_to_rmap(sp->gfn, PG_LEVEL_4K, slot); + + for_each_rmap_spte(rmap_head, &iter, sptep) { + if (is_writable_pte(*sptep)) + audit_printk(kvm, "shadow page has writable " + "mappings: gfn %llx role %x\n", + sp->gfn, sp->role.word); + } +} + +static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + check_mappings_rmap(kvm, sp); + audit_write_protection(kvm, sp); +} + +static void audit_all_active_sps(struct kvm *kvm) +{ + walk_all_active_sps(kvm, audit_sp); +} + +static void audit_spte(struct kvm_vcpu *vcpu, u64 *sptep, int level) +{ + audit_sptes_have_rmaps(vcpu, sptep, level); + audit_mappings(vcpu, sptep, level); + audit_spte_after_sync(vcpu, sptep, level); +} + +static void audit_vcpu_spte(struct kvm_vcpu *vcpu) +{ + mmu_spte_walk(vcpu, audit_spte); +} + +static bool mmu_audit; +static struct static_key mmu_audit_key; + +static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) +{ + static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); + + if (!__ratelimit(&ratelimit_state)) + return; + + vcpu->kvm->arch.audit_point = point; + audit_all_active_sps(vcpu->kvm); + audit_vcpu_spte(vcpu); +} + +static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) +{ + if (static_key_false((&mmu_audit_key))) + __kvm_mmu_audit(vcpu, point); +} + +static void mmu_audit_enable(void) +{ + if (mmu_audit) + return; + + static_key_slow_inc(&mmu_audit_key); + mmu_audit = true; +} + +static void mmu_audit_disable(void) +{ + if (!mmu_audit) + return; + + static_key_slow_dec(&mmu_audit_key); + mmu_audit = false; +} + +static int mmu_audit_set(const char *val, const struct kernel_param *kp) +{ + int ret; + unsigned long enable; + + ret = kstrtoul(val, 10, &enable); + if (ret < 0) + return -EINVAL; + + switch (enable) { + case 0: + mmu_audit_disable(); + break; + case 1: + mmu_audit_enable(); + break; + default: + return -EINVAL; + } + + return 0; +} + +static const struct kernel_param_ops audit_param_ops = { + .set = mmu_audit_set, + .get = param_get_bool, +}; + +arch_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644); diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h new file mode 100644 index 000000000000..9d15bc0c535b --- /dev/null +++ b/arch/x86/kvm/mmu/mmutrace.h @@ -0,0 +1,395 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(_TRACE_KVMMMU_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVMMMU_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvmmmu + +#define KVM_MMU_PAGE_FIELDS \ + __field(__u8, mmu_valid_gen) \ + __field(__u64, gfn) \ + __field(__u32, role) \ + __field(__u32, root_count) \ + __field(bool, unsync) + +#define KVM_MMU_PAGE_ASSIGN(sp) \ + __entry->mmu_valid_gen = sp->mmu_valid_gen; \ + __entry->gfn = sp->gfn; \ + __entry->role = sp->role.word; \ + __entry->root_count = sp->root_count; \ + __entry->unsync = sp->unsync; + +#define KVM_MMU_PAGE_PRINTK() ({ \ + const char *saved_ptr = trace_seq_buffer_ptr(p); \ + static const char *access_str[] = { \ + "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \ + }; \ + union kvm_mmu_page_role role; \ + \ + role.word = __entry->role; \ + \ + trace_seq_printf(p, "sp gen %u gfn %llx l%u %u-byte q%u%s %s%s" \ + " %snxe %sad root %u %s%c", \ + __entry->mmu_valid_gen, \ + __entry->gfn, role.level, \ + role.gpte_is_8_bytes ? 8 : 4, \ + role.quadrant, \ + role.direct ? " direct" : "", \ + access_str[role.access], \ + role.invalid ? " invalid" : "", \ + role.nxe ? "" : "!", \ + role.ad_disabled ? "!" : "", \ + __entry->root_count, \ + __entry->unsync ? "unsync" : "sync", 0); \ + saved_ptr; \ + }) + +#define kvm_mmu_trace_pferr_flags \ + { PFERR_PRESENT_MASK, "P" }, \ + { PFERR_WRITE_MASK, "W" }, \ + { PFERR_USER_MASK, "U" }, \ + { PFERR_RSVD_MASK, "RSVD" }, \ + { PFERR_FETCH_MASK, "F" } + +/* + * A pagetable walk has started + */ +TRACE_EVENT( + kvm_mmu_pagetable_walk, + TP_PROTO(u64 addr, u32 pferr), + TP_ARGS(addr, pferr), + + TP_STRUCT__entry( + __field(__u64, addr) + __field(__u32, pferr) + ), + + TP_fast_assign( + __entry->addr = addr; + __entry->pferr = pferr; + ), + + TP_printk("addr %llx pferr %x %s", __entry->addr, __entry->pferr, + __print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags)) +); + + +/* We just walked a paging element */ +TRACE_EVENT( + kvm_mmu_paging_element, + TP_PROTO(u64 pte, int level), + TP_ARGS(pte, level), + + TP_STRUCT__entry( + __field(__u64, pte) + __field(__u32, level) + ), + + TP_fast_assign( + __entry->pte = pte; + __entry->level = level; + ), + + TP_printk("pte %llx level %u", __entry->pte, __entry->level) +); + +DECLARE_EVENT_CLASS(kvm_mmu_set_bit_class, + + TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), + + TP_ARGS(table_gfn, index, size), + + TP_STRUCT__entry( + __field(__u64, gpa) + ), + + TP_fast_assign( + __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) + + index * size; + ), + + TP_printk("gpa %llx", __entry->gpa) +); + +/* We set a pte accessed bit */ +DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_accessed_bit, + + TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), + + TP_ARGS(table_gfn, index, size) +); + +/* We set a pte dirty bit */ +DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_dirty_bit, + + TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), + + TP_ARGS(table_gfn, index, size) +); + +TRACE_EVENT( + kvm_mmu_walker_error, + TP_PROTO(u32 pferr), + TP_ARGS(pferr), + + TP_STRUCT__entry( + __field(__u32, pferr) + ), + + TP_fast_assign( + __entry->pferr = pferr; + ), + + TP_printk("pferr %x %s", __entry->pferr, + __print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags)) +); + +TRACE_EVENT( + kvm_mmu_get_page, + TP_PROTO(struct kvm_mmu_page *sp, bool created), + TP_ARGS(sp, created), + + TP_STRUCT__entry( + KVM_MMU_PAGE_FIELDS + __field(bool, created) + ), + + TP_fast_assign( + KVM_MMU_PAGE_ASSIGN(sp) + __entry->created = created; + ), + + TP_printk("%s %s", KVM_MMU_PAGE_PRINTK(), + __entry->created ? "new" : "existing") +); + +DECLARE_EVENT_CLASS(kvm_mmu_page_class, + + TP_PROTO(struct kvm_mmu_page *sp), + TP_ARGS(sp), + + TP_STRUCT__entry( + KVM_MMU_PAGE_FIELDS + ), + + TP_fast_assign( + KVM_MMU_PAGE_ASSIGN(sp) + ), + + TP_printk("%s", KVM_MMU_PAGE_PRINTK()) +); + +DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_sync_page, + TP_PROTO(struct kvm_mmu_page *sp), + + TP_ARGS(sp) +); + +DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page, + TP_PROTO(struct kvm_mmu_page *sp), + + TP_ARGS(sp) +); + +DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page, + TP_PROTO(struct kvm_mmu_page *sp), + + TP_ARGS(sp) +); + +TRACE_EVENT( + mark_mmio_spte, + TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access, unsigned int gen), + TP_ARGS(sptep, gfn, access, gen), + + TP_STRUCT__entry( + __field(void *, sptep) + __field(gfn_t, gfn) + __field(unsigned, access) + __field(unsigned int, gen) + ), + + TP_fast_assign( + __entry->sptep = sptep; + __entry->gfn = gfn; + __entry->access = access; + __entry->gen = gen; + ), + + TP_printk("sptep:%p gfn %llx access %x gen %x", __entry->sptep, + __entry->gfn, __entry->access, __entry->gen) +); + +TRACE_EVENT( + handle_mmio_page_fault, + TP_PROTO(u64 addr, gfn_t gfn, unsigned access), + TP_ARGS(addr, gfn, access), + + TP_STRUCT__entry( + __field(u64, addr) + __field(gfn_t, gfn) + __field(unsigned, access) + ), + + TP_fast_assign( + __entry->addr = addr; + __entry->gfn = gfn; + __entry->access = access; + ), + + TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn, + __entry->access) +); + +#define __spte_satisfied(__spte) \ + (__entry->retry && is_writable_pte(__entry->__spte)) + +TRACE_EVENT( + fast_page_fault, + TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code, + u64 *sptep, u64 old_spte, bool retry), + TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(gpa_t, cr2_or_gpa) + __field(u32, error_code) + __field(u64 *, sptep) + __field(u64, old_spte) + __field(u64, new_spte) + __field(bool, retry) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->cr2_or_gpa = cr2_or_gpa; + __entry->error_code = error_code; + __entry->sptep = sptep; + __entry->old_spte = old_spte; + __entry->new_spte = *sptep; + __entry->retry = retry; + ), + + TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx" + " new %llx spurious %d fixed %d", __entry->vcpu_id, + __entry->cr2_or_gpa, __print_flags(__entry->error_code, "|", + kvm_mmu_trace_pferr_flags), __entry->sptep, + __entry->old_spte, __entry->new_spte, + __spte_satisfied(old_spte), __spte_satisfied(new_spte) + ) +); + +TRACE_EVENT( + kvm_mmu_zap_all_fast, + TP_PROTO(struct kvm *kvm), + TP_ARGS(kvm), + + TP_STRUCT__entry( + __field(__u8, mmu_valid_gen) + __field(unsigned int, mmu_used_pages) + ), + + TP_fast_assign( + __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen; + __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages; + ), + + TP_printk("kvm-mmu-valid-gen %u used_pages %x", + __entry->mmu_valid_gen, __entry->mmu_used_pages + ) +); + + +TRACE_EVENT( + check_mmio_spte, + TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen), + TP_ARGS(spte, kvm_gen, spte_gen), + + TP_STRUCT__entry( + __field(unsigned int, kvm_gen) + __field(unsigned int, spte_gen) + __field(u64, spte) + ), + + TP_fast_assign( + __entry->kvm_gen = kvm_gen; + __entry->spte_gen = spte_gen; + __entry->spte = spte; + ), + + TP_printk("spte %llx kvm_gen %x spte-gen %x valid %d", __entry->spte, + __entry->kvm_gen, __entry->spte_gen, + __entry->kvm_gen == __entry->spte_gen + ) +); + +TRACE_EVENT( + kvm_mmu_set_spte, + TP_PROTO(int level, gfn_t gfn, u64 *sptep), + TP_ARGS(level, gfn, sptep), + + TP_STRUCT__entry( + __field(u64, gfn) + __field(u64, spte) + __field(u64, sptep) + __field(u8, level) + /* These depend on page entry type, so compute them now. */ + __field(bool, r) + __field(bool, x) + __field(signed char, u) + ), + + TP_fast_assign( + __entry->gfn = gfn; + __entry->spte = *sptep; + __entry->sptep = virt_to_phys(sptep); + __entry->level = level; + __entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK); + __entry->x = is_executable_pte(__entry->spte); + __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1; + ), + + TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx", + __entry->gfn, __entry->spte, + __entry->r ? "r" : "-", + __entry->spte & PT_WRITABLE_MASK ? "w" : "-", + __entry->x ? "x" : "-", + __entry->u == -1 ? "" : (__entry->u ? "u" : "-"), + __entry->level, __entry->sptep + ) +); + +TRACE_EVENT( + kvm_mmu_spte_requested, + TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn), + TP_ARGS(addr, level, pfn), + + TP_STRUCT__entry( + __field(u64, gfn) + __field(u64, pfn) + __field(u8, level) + ), + + TP_fast_assign( + __entry->gfn = addr >> PAGE_SHIFT; + __entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); + __entry->level = level; + ), + + TP_printk("gfn %llx pfn %llx level %d", + __entry->gfn, __entry->pfn, __entry->level + ) +); + +#endif /* _TRACE_KVMMMU_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH mmu +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mmutrace + +/* This part must be outside protection */ +#include diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c deleted file mode 100644 index 9d2844f87f6d..000000000000 --- a/arch/x86/kvm/mmu_audit.c +++ /dev/null @@ -1,303 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * mmu_audit.c: - * - * Audit code for KVM MMU - * - * Copyright (C) 2006 Qumranet, Inc. - * Copyright 2010 Red Hat, Inc. and/or its affiliates. - * - * Authors: - * Yaniv Kamay - * Avi Kivity - * Marcelo Tosatti - * Xiao Guangrong - */ - -#include - -static char const *audit_point_name[] = { - "pre page fault", - "post page fault", - "pre pte write", - "post pte write", - "pre sync", - "post sync" -}; - -#define audit_printk(kvm, fmt, args...) \ - printk(KERN_ERR "audit: (%s) error: " \ - fmt, audit_point_name[kvm->arch.audit_point], ##args) - -typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level); - -static void __mmu_spte_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - inspect_spte_fn fn, int level) -{ - int i; - - for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { - u64 *ent = sp->spt; - - fn(vcpu, ent + i, level); - - if (is_shadow_present_pte(ent[i]) && - !is_last_spte(ent[i], level)) { - struct kvm_mmu_page *child; - - child = page_header(ent[i] & PT64_BASE_ADDR_MASK); - __mmu_spte_walk(vcpu, child, fn, level - 1); - } - } -} - -static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn) -{ - int i; - struct kvm_mmu_page *sp; - - if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) - return; - - if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) { - hpa_t root = vcpu->arch.mmu->root_hpa; - - sp = page_header(root); - __mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu->root_level); - return; - } - - for (i = 0; i < 4; ++i) { - hpa_t root = vcpu->arch.mmu->pae_root[i]; - - if (root && VALID_PAGE(root)) { - root &= PT64_BASE_ADDR_MASK; - sp = page_header(root); - __mmu_spte_walk(vcpu, sp, fn, 2); - } - } - - return; -} - -typedef void (*sp_handler) (struct kvm *kvm, struct kvm_mmu_page *sp); - -static void walk_all_active_sps(struct kvm *kvm, sp_handler fn) -{ - struct kvm_mmu_page *sp; - - list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) - fn(kvm, sp); -} - -static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) -{ - struct kvm_mmu_page *sp; - gfn_t gfn; - kvm_pfn_t pfn; - hpa_t hpa; - - sp = page_header(__pa(sptep)); - - if (sp->unsync) { - if (level != PG_LEVEL_4K) { - audit_printk(vcpu->kvm, "unsync sp: %p " - "level = %d\n", sp, level); - return; - } - } - - if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep, level)) - return; - - gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); - pfn = kvm_vcpu_gfn_to_pfn_atomic(vcpu, gfn); - - if (is_error_pfn(pfn)) - return; - - hpa = pfn << PAGE_SHIFT; - if ((*sptep & PT64_BASE_ADDR_MASK) != hpa) - audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx " - "ent %llxn", vcpu->arch.mmu->root_level, pfn, - hpa, *sptep); -} - -static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) -{ - static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); - struct kvm_rmap_head *rmap_head; - struct kvm_mmu_page *rev_sp; - struct kvm_memslots *slots; - struct kvm_memory_slot *slot; - gfn_t gfn; - - rev_sp = page_header(__pa(sptep)); - gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); - - slots = kvm_memslots_for_spte_role(kvm, rev_sp->role); - slot = __gfn_to_memslot(slots, gfn); - if (!slot) { - if (!__ratelimit(&ratelimit_state)) - return; - audit_printk(kvm, "no memslot for gfn %llx\n", gfn); - audit_printk(kvm, "index %ld of sp (gfn=%llx)\n", - (long int)(sptep - rev_sp->spt), rev_sp->gfn); - dump_stack(); - return; - } - - rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot); - if (!rmap_head->val) { - if (!__ratelimit(&ratelimit_state)) - return; - audit_printk(kvm, "no rmap for writable spte %llx\n", - *sptep); - dump_stack(); - } -} - -static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level) -{ - if (is_shadow_present_pte(*sptep) && is_last_spte(*sptep, level)) - inspect_spte_has_rmap(vcpu->kvm, sptep); -} - -static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level) -{ - struct kvm_mmu_page *sp = page_header(__pa(sptep)); - - if (vcpu->kvm->arch.audit_point == AUDIT_POST_SYNC && sp->unsync) - audit_printk(vcpu->kvm, "meet unsync sp(%p) after sync " - "root.\n", sp); -} - -static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) -{ - int i; - - if (sp->role.level != PG_LEVEL_4K) - return; - - for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { - if (!is_shadow_present_pte(sp->spt[i])) - continue; - - inspect_spte_has_rmap(kvm, sp->spt + i); - } -} - -static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) -{ - struct kvm_rmap_head *rmap_head; - u64 *sptep; - struct rmap_iterator iter; - struct kvm_memslots *slots; - struct kvm_memory_slot *slot; - - if (sp->role.direct || sp->unsync || sp->role.invalid) - return; - - slots = kvm_memslots_for_spte_role(kvm, sp->role); - slot = __gfn_to_memslot(slots, sp->gfn); - rmap_head = __gfn_to_rmap(sp->gfn, PG_LEVEL_4K, slot); - - for_each_rmap_spte(rmap_head, &iter, sptep) { - if (is_writable_pte(*sptep)) - audit_printk(kvm, "shadow page has writable " - "mappings: gfn %llx role %x\n", - sp->gfn, sp->role.word); - } -} - -static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp) -{ - check_mappings_rmap(kvm, sp); - audit_write_protection(kvm, sp); -} - -static void audit_all_active_sps(struct kvm *kvm) -{ - walk_all_active_sps(kvm, audit_sp); -} - -static void audit_spte(struct kvm_vcpu *vcpu, u64 *sptep, int level) -{ - audit_sptes_have_rmaps(vcpu, sptep, level); - audit_mappings(vcpu, sptep, level); - audit_spte_after_sync(vcpu, sptep, level); -} - -static void audit_vcpu_spte(struct kvm_vcpu *vcpu) -{ - mmu_spte_walk(vcpu, audit_spte); -} - -static bool mmu_audit; -static struct static_key mmu_audit_key; - -static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) -{ - static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); - - if (!__ratelimit(&ratelimit_state)) - return; - - vcpu->kvm->arch.audit_point = point; - audit_all_active_sps(vcpu->kvm); - audit_vcpu_spte(vcpu); -} - -static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) -{ - if (static_key_false((&mmu_audit_key))) - __kvm_mmu_audit(vcpu, point); -} - -static void mmu_audit_enable(void) -{ - if (mmu_audit) - return; - - static_key_slow_inc(&mmu_audit_key); - mmu_audit = true; -} - -static void mmu_audit_disable(void) -{ - if (!mmu_audit) - return; - - static_key_slow_dec(&mmu_audit_key); - mmu_audit = false; -} - -static int mmu_audit_set(const char *val, const struct kernel_param *kp) -{ - int ret; - unsigned long enable; - - ret = kstrtoul(val, 10, &enable); - if (ret < 0) - return -EINVAL; - - switch (enable) { - case 0: - mmu_audit_disable(); - break; - case 1: - mmu_audit_enable(); - break; - default: - return -EINVAL; - } - - return 0; -} - -static const struct kernel_param_ops audit_param_ops = { - .set = mmu_audit_set, - .get = param_get_bool, -}; - -arch_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644); diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h deleted file mode 100644 index ffcd96fc02d0..000000000000 --- a/arch/x86/kvm/mmutrace.h +++ /dev/null @@ -1,395 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#if !defined(_TRACE_KVMMMU_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_KVMMMU_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM kvmmmu - -#define KVM_MMU_PAGE_FIELDS \ - __field(__u8, mmu_valid_gen) \ - __field(__u64, gfn) \ - __field(__u32, role) \ - __field(__u32, root_count) \ - __field(bool, unsync) - -#define KVM_MMU_PAGE_ASSIGN(sp) \ - __entry->mmu_valid_gen = sp->mmu_valid_gen; \ - __entry->gfn = sp->gfn; \ - __entry->role = sp->role.word; \ - __entry->root_count = sp->root_count; \ - __entry->unsync = sp->unsync; - -#define KVM_MMU_PAGE_PRINTK() ({ \ - const char *saved_ptr = trace_seq_buffer_ptr(p); \ - static const char *access_str[] = { \ - "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \ - }; \ - union kvm_mmu_page_role role; \ - \ - role.word = __entry->role; \ - \ - trace_seq_printf(p, "sp gen %u gfn %llx l%u %u-byte q%u%s %s%s" \ - " %snxe %sad root %u %s%c", \ - __entry->mmu_valid_gen, \ - __entry->gfn, role.level, \ - role.gpte_is_8_bytes ? 8 : 4, \ - role.quadrant, \ - role.direct ? " direct" : "", \ - access_str[role.access], \ - role.invalid ? " invalid" : "", \ - role.nxe ? "" : "!", \ - role.ad_disabled ? "!" : "", \ - __entry->root_count, \ - __entry->unsync ? "unsync" : "sync", 0); \ - saved_ptr; \ - }) - -#define kvm_mmu_trace_pferr_flags \ - { PFERR_PRESENT_MASK, "P" }, \ - { PFERR_WRITE_MASK, "W" }, \ - { PFERR_USER_MASK, "U" }, \ - { PFERR_RSVD_MASK, "RSVD" }, \ - { PFERR_FETCH_MASK, "F" } - -/* - * A pagetable walk has started - */ -TRACE_EVENT( - kvm_mmu_pagetable_walk, - TP_PROTO(u64 addr, u32 pferr), - TP_ARGS(addr, pferr), - - TP_STRUCT__entry( - __field(__u64, addr) - __field(__u32, pferr) - ), - - TP_fast_assign( - __entry->addr = addr; - __entry->pferr = pferr; - ), - - TP_printk("addr %llx pferr %x %s", __entry->addr, __entry->pferr, - __print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags)) -); - - -/* We just walked a paging element */ -TRACE_EVENT( - kvm_mmu_paging_element, - TP_PROTO(u64 pte, int level), - TP_ARGS(pte, level), - - TP_STRUCT__entry( - __field(__u64, pte) - __field(__u32, level) - ), - - TP_fast_assign( - __entry->pte = pte; - __entry->level = level; - ), - - TP_printk("pte %llx level %u", __entry->pte, __entry->level) -); - -DECLARE_EVENT_CLASS(kvm_mmu_set_bit_class, - - TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), - - TP_ARGS(table_gfn, index, size), - - TP_STRUCT__entry( - __field(__u64, gpa) - ), - - TP_fast_assign( - __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) - + index * size; - ), - - TP_printk("gpa %llx", __entry->gpa) -); - -/* We set a pte accessed bit */ -DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_accessed_bit, - - TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), - - TP_ARGS(table_gfn, index, size) -); - -/* We set a pte dirty bit */ -DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_dirty_bit, - - TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), - - TP_ARGS(table_gfn, index, size) -); - -TRACE_EVENT( - kvm_mmu_walker_error, - TP_PROTO(u32 pferr), - TP_ARGS(pferr), - - TP_STRUCT__entry( - __field(__u32, pferr) - ), - - TP_fast_assign( - __entry->pferr = pferr; - ), - - TP_printk("pferr %x %s", __entry->pferr, - __print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags)) -); - -TRACE_EVENT( - kvm_mmu_get_page, - TP_PROTO(struct kvm_mmu_page *sp, bool created), - TP_ARGS(sp, created), - - TP_STRUCT__entry( - KVM_MMU_PAGE_FIELDS - __field(bool, created) - ), - - TP_fast_assign( - KVM_MMU_PAGE_ASSIGN(sp) - __entry->created = created; - ), - - TP_printk("%s %s", KVM_MMU_PAGE_PRINTK(), - __entry->created ? "new" : "existing") -); - -DECLARE_EVENT_CLASS(kvm_mmu_page_class, - - TP_PROTO(struct kvm_mmu_page *sp), - TP_ARGS(sp), - - TP_STRUCT__entry( - KVM_MMU_PAGE_FIELDS - ), - - TP_fast_assign( - KVM_MMU_PAGE_ASSIGN(sp) - ), - - TP_printk("%s", KVM_MMU_PAGE_PRINTK()) -); - -DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_sync_page, - TP_PROTO(struct kvm_mmu_page *sp), - - TP_ARGS(sp) -); - -DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page, - TP_PROTO(struct kvm_mmu_page *sp), - - TP_ARGS(sp) -); - -DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page, - TP_PROTO(struct kvm_mmu_page *sp), - - TP_ARGS(sp) -); - -TRACE_EVENT( - mark_mmio_spte, - TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access, unsigned int gen), - TP_ARGS(sptep, gfn, access, gen), - - TP_STRUCT__entry( - __field(void *, sptep) - __field(gfn_t, gfn) - __field(unsigned, access) - __field(unsigned int, gen) - ), - - TP_fast_assign( - __entry->sptep = sptep; - __entry->gfn = gfn; - __entry->access = access; - __entry->gen = gen; - ), - - TP_printk("sptep:%p gfn %llx access %x gen %x", __entry->sptep, - __entry->gfn, __entry->access, __entry->gen) -); - -TRACE_EVENT( - handle_mmio_page_fault, - TP_PROTO(u64 addr, gfn_t gfn, unsigned access), - TP_ARGS(addr, gfn, access), - - TP_STRUCT__entry( - __field(u64, addr) - __field(gfn_t, gfn) - __field(unsigned, access) - ), - - TP_fast_assign( - __entry->addr = addr; - __entry->gfn = gfn; - __entry->access = access; - ), - - TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn, - __entry->access) -); - -#define __spte_satisfied(__spte) \ - (__entry->retry && is_writable_pte(__entry->__spte)) - -TRACE_EVENT( - fast_page_fault, - TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code, - u64 *sptep, u64 old_spte, bool retry), - TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry), - - TP_STRUCT__entry( - __field(int, vcpu_id) - __field(gpa_t, cr2_or_gpa) - __field(u32, error_code) - __field(u64 *, sptep) - __field(u64, old_spte) - __field(u64, new_spte) - __field(bool, retry) - ), - - TP_fast_assign( - __entry->vcpu_id = vcpu->vcpu_id; - __entry->cr2_or_gpa = cr2_or_gpa; - __entry->error_code = error_code; - __entry->sptep = sptep; - __entry->old_spte = old_spte; - __entry->new_spte = *sptep; - __entry->retry = retry; - ), - - TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx" - " new %llx spurious %d fixed %d", __entry->vcpu_id, - __entry->cr2_or_gpa, __print_flags(__entry->error_code, "|", - kvm_mmu_trace_pferr_flags), __entry->sptep, - __entry->old_spte, __entry->new_spte, - __spte_satisfied(old_spte), __spte_satisfied(new_spte) - ) -); - -TRACE_EVENT( - kvm_mmu_zap_all_fast, - TP_PROTO(struct kvm *kvm), - TP_ARGS(kvm), - - TP_STRUCT__entry( - __field(__u8, mmu_valid_gen) - __field(unsigned int, mmu_used_pages) - ), - - TP_fast_assign( - __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen; - __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages; - ), - - TP_printk("kvm-mmu-valid-gen %u used_pages %x", - __entry->mmu_valid_gen, __entry->mmu_used_pages - ) -); - - -TRACE_EVENT( - check_mmio_spte, - TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen), - TP_ARGS(spte, kvm_gen, spte_gen), - - TP_STRUCT__entry( - __field(unsigned int, kvm_gen) - __field(unsigned int, spte_gen) - __field(u64, spte) - ), - - TP_fast_assign( - __entry->kvm_gen = kvm_gen; - __entry->spte_gen = spte_gen; - __entry->spte = spte; - ), - - TP_printk("spte %llx kvm_gen %x spte-gen %x valid %d", __entry->spte, - __entry->kvm_gen, __entry->spte_gen, - __entry->kvm_gen == __entry->spte_gen - ) -); - -TRACE_EVENT( - kvm_mmu_set_spte, - TP_PROTO(int level, gfn_t gfn, u64 *sptep), - TP_ARGS(level, gfn, sptep), - - TP_STRUCT__entry( - __field(u64, gfn) - __field(u64, spte) - __field(u64, sptep) - __field(u8, level) - /* These depend on page entry type, so compute them now. */ - __field(bool, r) - __field(bool, x) - __field(signed char, u) - ), - - TP_fast_assign( - __entry->gfn = gfn; - __entry->spte = *sptep; - __entry->sptep = virt_to_phys(sptep); - __entry->level = level; - __entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK); - __entry->x = is_executable_pte(__entry->spte); - __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1; - ), - - TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx", - __entry->gfn, __entry->spte, - __entry->r ? "r" : "-", - __entry->spte & PT_WRITABLE_MASK ? "w" : "-", - __entry->x ? "x" : "-", - __entry->u == -1 ? "" : (__entry->u ? "u" : "-"), - __entry->level, __entry->sptep - ) -); - -TRACE_EVENT( - kvm_mmu_spte_requested, - TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn), - TP_ARGS(addr, level, pfn), - - TP_STRUCT__entry( - __field(u64, gfn) - __field(u64, pfn) - __field(u8, level) - ), - - TP_fast_assign( - __entry->gfn = addr >> PAGE_SHIFT; - __entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); - __entry->level = level; - ), - - TP_printk("gfn %llx pfn %llx level %d", - __entry->gfn, __entry->pfn, __entry->level - ) -); - -#endif /* _TRACE_KVMMMU_H */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE mmutrace - -/* This part must be outside protection */ -#include