Merge tag 'cgroup-for-6.0-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel...

[linux-2.6-block.git] / virt / kvm / kvm_main.c
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 515dfe9d3bcfb09b091f6af83dae7d76210f60dd..584a5bab3af395e392b4f4d83f37c75c25859bdb 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -702,30 +702,31 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
  
         /*
          * .change_pte() must be surrounded by .invalidate_range_{start,end}().
-        * If mmu_notifier_count is zero, then no in-progress invalidations,
-        * including this one, found a relevant memslot at start(); rechecking
-        * memslots here is unnecessary.  Note, a false positive (count elevated
-        * by a different invalidation) is sub-optimal but functionally ok.
+        * If mmu_invalidate_in_progress is zero, then no in-progress
+        * invalidations, including this one, found a relevant memslot at
+        * start(); rechecking memslots here is unnecessary.  Note, a false
+        * positive (count elevated by a different invalidation) is sub-optimal
+        * but functionally ok.
          */
         WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
-       if (!READ_ONCE(kvm->mmu_notifier_count))
+       if (!READ_ONCE(kvm->mmu_invalidate_in_progress))
                 return;
  
         kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn);
  }
  
-void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
-                                  unsigned long end)
+void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
+                             unsigned long end)
  {
         /*
          * The count increase must become visible at unlock time as no
          * spte can be established without taking the mmu_lock and
          * count is also read inside the mmu_lock critical section.
          */
-       kvm->mmu_notifier_count++;
-       if (likely(kvm->mmu_notifier_count == 1)) {
-               kvm->mmu_notifier_range_start = start;
-               kvm->mmu_notifier_range_end = end;
+       kvm->mmu_invalidate_in_progress++;
+       if (likely(kvm->mmu_invalidate_in_progress == 1)) {
+               kvm->mmu_invalidate_range_start = start;
+               kvm->mmu_invalidate_range_end = end;
         } else {
                 /*
                  * Fully tracking multiple concurrent ranges has diminishing
@@ -736,10 +737,10 @@ void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
                  * accumulate and persist until all outstanding invalidates
                  * complete.
                  */
-               kvm->mmu_notifier_range_start =
-                       min(kvm->mmu_notifier_range_start, start);
-               kvm->mmu_notifier_range_end =
-                       max(kvm->mmu_notifier_range_end, end);
+               kvm->mmu_invalidate_range_start =
+                       min(kvm->mmu_invalidate_range_start, start);
+               kvm->mmu_invalidate_range_end =
+                       max(kvm->mmu_invalidate_range_end, end);
         }
  }
  
@@ -752,7 +753,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
                 .end            = range->end,
                 .pte            = __pte(0),
                 .handler        = kvm_unmap_gfn_range,
-               .on_lock        = kvm_inc_notifier_count,
+               .on_lock        = kvm_mmu_invalidate_begin,
                 .on_unlock      = kvm_arch_guest_memory_reclaimed,
                 .flush_on_ret   = true,
                 .may_block      = mmu_notifier_range_blockable(range),
@@ -763,7 +764,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
         /*
          * Prevent memslot modification between range_start() and range_end()
          * so that conditionally locking provides the same result in both
-        * functions.  Without that guarantee, the mmu_notifier_count
+        * functions.  Without that guarantee, the mmu_invalidate_in_progress
          * adjustments will be imbalanced.
          *
          * Pairs with the decrement in range_end().
@@ -779,7 +780,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
          * any given time, and the caches themselves can check for hva overlap,
          * i.e. don't need to rely on memslot overlap checks for performance.
          * Because this runs without holding mmu_lock, the pfn caches must use
-        * mn_active_invalidate_count (see above) instead of mmu_notifier_count.
+        * mn_active_invalidate_count (see above) instead of
+        * mmu_invalidate_in_progress.
          */
         gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
                                           hva_range.may_block);
@@ -789,22 +791,22 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
         return 0;
  }
  
-void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start,
-                                  unsigned long end)
+void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
+                           unsigned long end)
  {
         /*
          * This sequence increase will notify the kvm page fault that
          * the page that is going to be mapped in the spte could have
          * been freed.
          */
-       kvm->mmu_notifier_seq++;
+       kvm->mmu_invalidate_seq++;
         smp_wmb();
         /*
          * The above sequence increase must be visible before the
          * below count decrease, which is ensured by the smp_wmb above
-        * in conjunction with the smp_rmb in mmu_notifier_retry().
+        * in conjunction with the smp_rmb in mmu_invalidate_retry().
          */
-       kvm->mmu_notifier_count--;
+       kvm->mmu_invalidate_in_progress--;
  }
  
  static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
@@ -816,7 +818,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
                 .end            = range->end,
                 .pte            = __pte(0),
                 .handler        = (void *)kvm_null_fn,
-               .on_lock        = kvm_dec_notifier_count,
+               .on_lock        = kvm_mmu_invalidate_end,
                 .on_unlock      = (void *)kvm_null_fn,
                 .flush_on_ret   = false,
                 .may_block      = mmu_notifier_range_blockable(range),
@@ -837,7 +839,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
         if (wake)
                 rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait);
  
-       BUG_ON(kvm->mmu_notifier_count < 0);
+       BUG_ON(kvm->mmu_invalidate_in_progress < 0);
  }
  
  static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
@@ -1134,6 +1136,9 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
         if (!kvm)
                 return ERR_PTR(-ENOMEM);
  
+       /* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */
+       __module_get(kvm_chardev_ops.owner);
+
         KVM_MMU_LOCK_INIT(kvm);
         mmgrab(current->mm);
         kvm->mm = current->mm;
@@ -1211,9 +1216,17 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
         if (r)
                 goto out_err_no_mmu_notifier;
  
+       r = kvm_coalesced_mmio_init(kvm);
+       if (r < 0)
+               goto out_no_coalesced_mmio;
+
+       r = kvm_create_vm_debugfs(kvm, fdname);
+       if (r)
+               goto out_err_no_debugfs;
+
         r = kvm_arch_post_init_vm(kvm);
         if (r)
-               goto out_err_mmu_notifier;
+               goto out_err;
  
         mutex_lock(&kvm_lock);
         list_add(&kvm->vm_list, &vm_list);
@@ -1222,25 +1235,13 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
         preempt_notifier_inc();
         kvm_init_pm_notifier(kvm);
  
-       /*
-        * When the fd passed to this ioctl() is opened it pins the module,
-        * but try_module_get() also prevents getting a reference if the module
-        * is in MODULE_STATE_GOING (e.g. if someone ran "rmmod --wait").
-        */
-       if (!try_module_get(kvm_chardev_ops.owner)) {
-               r = -ENODEV;
-               goto out_err_mmu_notifier;
-       }
-
-       r = kvm_create_vm_debugfs(kvm, fdname);
-       if (r)
-               goto out_err;
-
         return kvm;
  
  out_err:
-       module_put(kvm_chardev_ops.owner);
-out_err_mmu_notifier:
+       kvm_destroy_vm_debugfs(kvm);
+out_err_no_debugfs:
+       kvm_coalesced_mmio_free(kvm);
+out_no_coalesced_mmio:
  #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
         if (kvm->mmu_notifier.ops)
                 mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
@@ -1259,6 +1260,7 @@ out_err_no_irq_srcu:
  out_err_no_srcu:
         kvm_arch_free_vm(kvm);
         mmdrop(current->mm);
+       module_put(kvm_chardev_ops.owner);
         return ERR_PTR(r);
  }
  
@@ -2516,7 +2518,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
  {
         unsigned int flags = FOLL_HWPOISON;
         struct page *page;
-       int npages = 0;
+       int npages;
  
         might_sleep();
  
@@ -4378,7 +4380,7 @@ void kvm_unregister_device_ops(u32 type)
  static int kvm_ioctl_create_device(struct kvm *kvm,
                                    struct kvm_create_device *cd)
  {
-       const struct kvm_device_ops *ops = NULL;
+       const struct kvm_device_ops *ops;
         struct kvm_device *dev;
         bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
         int type;
@@ -4913,11 +4915,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
                 goto put_fd;
         }
  
-#ifdef CONFIG_KVM_MMIO
-       r = kvm_coalesced_mmio_init(kvm);
-       if (r < 0)
-               goto put_kvm;
-#endif
         file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
         if (IS_ERR(file)) {
                 r = PTR_ERR(file);