KVM: x86: Add a new page-track hook to handle memslot deletion
authorYan Zhao <yan.y.zhao@intel.com>
Sat, 29 Jul 2023 01:35:24 +0000 (18:35 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 31 Aug 2023 18:07:25 +0000 (14:07 -0400)
Add a new page-track hook, track_remove_region(), that is called when a
memslot DELETE operation is about to be committed.  The "remove" hook
will be used by KVMGT and will effectively replace the existing
track_flush_slot() altogether now that KVM itself doesn't rely on the
"flush" hook either.

The "flush" hook is flawed as it's invoked before the memslot operation
is guaranteed to succeed, i.e. KVM might ultimately keep the existing
memslot without notifying external page track users, a.k.a. KVMGT.  In
practice, this can't currently happen on x86, but there are no guarantees
that won't change in the future, not to mention that "flush" does a very
poor job of describing what is happening.

Pass in the gfn+nr_pages instead of the slot itself so external users,
i.e. KVMGT, don't need to exposed to KVM internals (memslots).  This will
help set the stage for additional cleanups to the page-track APIs.

Opportunistically align the existing srcu_read_lock_held() usage so that
the new case doesn't stand out like a sore thumb (and not aligning the
new code makes bots unhappy).

Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Tested-by: Yongwei Ma <yongwei.ma@intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20230729013535.1070024-19-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/kvm_page_track.h
arch/x86/kvm/mmu/page_track.c
arch/x86/kvm/x86.c

index f744682648e753d0f45434360db685f760f7a1d3..cfd36c22b4673c9bde90a6cff17d46354154862d 100644 (file)
@@ -43,6 +43,17 @@ struct kvm_page_track_notifier_node {
         */
        void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
                            struct kvm_page_track_notifier_node *node);
+
+       /*
+        * Invoked when a memory region is removed from the guest.  Or in KVM
+        * terms, when a memslot is deleted.
+        *
+        * @gfn:       base gfn of the region being removed
+        * @nr_pages:  number of pages in the to-be-removed region
+        * @node:      this node
+        */
+       void (*track_remove_region)(gfn_t gfn, unsigned long nr_pages,
+                                   struct kvm_page_track_notifier_node *node);
 };
 
 int kvm_page_track_init(struct kvm *kvm);
@@ -75,6 +86,7 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
 void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
                          int bytes);
 void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
+void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
 
 bool kvm_page_track_has_external_user(struct kvm *kvm);
 
index cfd0b8092d06d3d5931452fd30da8f358cee4bce..640e383b5252968ec4b9ac42f1f50916d380d9a2 100644 (file)
@@ -270,7 +270,7 @@ void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
 
        idx = srcu_read_lock(&head->track_srcu);
        hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
-                               srcu_read_lock_held(&head->track_srcu))
+                                 srcu_read_lock_held(&head->track_srcu))
                if (n->track_write)
                        n->track_write(gpa, new, bytes, n);
        srcu_read_unlock(&head->track_srcu, idx);
@@ -298,12 +298,35 @@ void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
 
        idx = srcu_read_lock(&head->track_srcu);
        hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
-                               srcu_read_lock_held(&head->track_srcu))
+                                 srcu_read_lock_held(&head->track_srcu))
                if (n->track_flush_slot)
                        n->track_flush_slot(kvm, slot, n);
        srcu_read_unlock(&head->track_srcu, idx);
 }
 
+/*
+ * Notify external page track nodes that a memory region is being removed from
+ * the VM, e.g. so that users can free any associated metadata.
+ */
+void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+       struct kvm_page_track_notifier_head *head;
+       struct kvm_page_track_notifier_node *n;
+       int idx;
+
+       head = &kvm->arch.track_notifier_head;
+
+       if (hlist_empty(&head->track_notifier_list))
+               return;
+
+       idx = srcu_read_lock(&head->track_srcu);
+       hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
+                                 srcu_read_lock_held(&head->track_srcu))
+               if (n->track_remove_region)
+                       n->track_remove_region(slot->base_gfn, slot->npages, n);
+       srcu_read_unlock(&head->track_srcu, idx);
+}
+
 bool kvm_page_track_has_external_user(struct kvm *kvm)
 {
        return !hlist_empty(&kvm->arch.track_notifier_head.track_notifier_list);
index 161e97e6caa96b52aee5331a4c440dc78f722fff..7928464475930a8b615939210de139a538cd3187 100644 (file)
@@ -12793,6 +12793,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                                const struct kvm_memory_slot *new,
                                enum kvm_mr_change change)
 {
+       if (change == KVM_MR_DELETE)
+               kvm_page_track_delete_slot(kvm, old);
+
        if (!kvm->arch.n_requested_mmu_pages &&
            (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) {
                unsigned long nr_mmu_pages;