kvm: x86: mmu: Recovery of shattered NX large pages

[linux-2.6-block.git] / arch / x86 / kvm / mmu.c
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c

index bedf6864b0929b05aec8259e223ba2d2a3139c87..529589a42afb09c8439fd186f43732367a5242e1 100644 (file)
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -37,6 +37,7 @@
  #include <linux/uaccess.h>
  #include <linux/hash.h>
  #include <linux/kern_levels.h>
+#include <linux/kthread.h>
  
  #include <asm/page.h>
  #include <asm/pat.h>
@@ -50,16 +51,26 @@
  extern bool itlb_multihit_kvm_mitigation;
  
  static int __read_mostly nx_huge_pages = -1;
+static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
  
  static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
+static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
  
  static struct kernel_param_ops nx_huge_pages_ops = {
         .set = set_nx_huge_pages,
         .get = param_get_bool,
  };
  
+static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
+       .set = set_nx_huge_pages_recovery_ratio,
+       .get = param_get_uint,
+};
+
  module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
  __MODULE_PARM_TYPE(nx_huge_pages, "bool");
+module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
+               &nx_huge_pages_recovery_ratio, 0644);
+__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
  
  /*
   * When setting this variable to true it enables Two-Dimensional-Paging
@@ -1215,6 +1226,8 @@ static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
                 return;
  
         ++kvm->stat.nx_lpage_splits;
+       list_add_tail(&sp->lpage_disallowed_link,
+                     &kvm->arch.lpage_disallowed_mmu_pages);
         sp->lpage_disallowed = true;
  }
  
@@ -1239,6 +1252,7 @@ static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
  {
         --kvm->stat.nx_lpage_splits;
         sp->lpage_disallowed = false;
+       list_del(&sp->lpage_disallowed_link);
  }
  
  static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
@@ -6274,6 +6288,8 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
                         idx = srcu_read_lock(&kvm->srcu);
                         kvm_mmu_zap_all_fast(kvm);
                         srcu_read_unlock(&kvm->srcu, idx);
+
+                       wake_up_process(kvm->arch.nx_lpage_recovery_thread);
                 }
                 mutex_unlock(&kvm_lock);
         }
@@ -6367,3 +6383,116 @@ void kvm_mmu_module_exit(void)
         unregister_shrinker(&mmu_shrinker);
         mmu_audit_disable();
  }
+
+static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
+{
+       unsigned int old_val;
+       int err;
+
+       old_val = nx_huge_pages_recovery_ratio;
+       err = param_set_uint(val, kp);
+       if (err)
+               return err;
+
+       if (READ_ONCE(nx_huge_pages) &&
+           !old_val && nx_huge_pages_recovery_ratio) {
+               struct kvm *kvm;
+
+               mutex_lock(&kvm_lock);
+
+               list_for_each_entry(kvm, &vm_list, vm_list)
+                       wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+
+               mutex_unlock(&kvm_lock);
+       }
+
+       return err;
+}
+
+static void kvm_recover_nx_lpages(struct kvm *kvm)
+{
+       int rcu_idx;
+       struct kvm_mmu_page *sp;
+       unsigned int ratio;
+       LIST_HEAD(invalid_list);
+       ulong to_zap;
+
+       rcu_idx = srcu_read_lock(&kvm->srcu);
+       spin_lock(&kvm->mmu_lock);
+
+       ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
+       to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
+       while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) {
+               /*
+                * We use a separate list instead of just using active_mmu_pages
+                * because the number of lpage_disallowed pages is expected to
+                * be relatively small compared to the total.
+                */
+               sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
+                                     struct kvm_mmu_page,
+                                     lpage_disallowed_link);
+               WARN_ON_ONCE(!sp->lpage_disallowed);
+               kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+               WARN_ON_ONCE(sp->lpage_disallowed);
+
+               if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+                       kvm_mmu_commit_zap_page(kvm, &invalid_list);
+                       if (to_zap)
+                               cond_resched_lock(&kvm->mmu_lock);
+               }
+       }
+
+       spin_unlock(&kvm->mmu_lock);
+       srcu_read_unlock(&kvm->srcu, rcu_idx);
+}
+
+static long get_nx_lpage_recovery_timeout(u64 start_time)
+{
+       return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
+               ? start_time + 60 * HZ - get_jiffies_64()
+               : MAX_SCHEDULE_TIMEOUT;
+}
+
+static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
+{
+       u64 start_time;
+       long remaining_time;
+
+       while (true) {
+               start_time = get_jiffies_64();
+               remaining_time = get_nx_lpage_recovery_timeout(start_time);
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               while (!kthread_should_stop() && remaining_time > 0) {
+                       schedule_timeout(remaining_time);
+                       remaining_time = get_nx_lpage_recovery_timeout(start_time);
+                       set_current_state(TASK_INTERRUPTIBLE);
+               }
+
+               set_current_state(TASK_RUNNING);
+
+               if (kthread_should_stop())
+                       return 0;
+
+               kvm_recover_nx_lpages(kvm);
+       }
+}
+
+int kvm_mmu_post_init_vm(struct kvm *kvm)
+{
+       int err;
+
+       err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
+                                         "kvm-nx-lpage-recovery",
+                                         &kvm->arch.nx_lpage_recovery_thread);
+       if (!err)
+               kthread_unpark(kvm->arch.nx_lpage_recovery_thread);
+
+       return err;
+}
+
+void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
+{
+       if (kvm->arch.nx_lpage_recovery_thread)
+               kthread_stop(kvm->arch.nx_lpage_recovery_thread);
+}