KVM: PPC: Book3S HV: Add a VM capability to enable nested virtualization
authorPaul Mackerras <paulus@ozlabs.org>
Fri, 21 Sep 2018 10:02:01 +0000 (20:02 +1000)
committerPaul Mackerras <paulus@ozlabs.org>
Tue, 9 Oct 2018 05:14:47 +0000 (16:14 +1100)
With this, userspace can enable a KVM-HV guest to run nested guests
under it.

The administrator can control whether any nested guests can be run;
setting the "nested" module parameter to false prevents any guests
becoming nested hypervisors (that is, any attempt to enable the nested
capability on a guest will fail).  Guests which are already nested
hypervisors will continue to be so.

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Documentation/virtual/kvm/api.txt
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/powerpc.c
include/uapi/linux/kvm.h

index 2f5f9b743bff1727585d2c13126940541fd47455..fde48b6708f187f4d39cedc030054a9f80b5a19a 100644 (file)
@@ -4532,6 +4532,20 @@ With this capability, a guest may read the MSR_PLATFORM_INFO MSR. Otherwise,
 a #GP would be raised when the guest tries to access. Currently, this
 capability does not enable write permissions of this MSR for the guest.
 
+7.16 KVM_CAP_PPC_NESTED_HV
+
+Architectures: ppc
+Parameters: none
+Returns: 0 on success, -EINVAL when the implementation doesn't support
+        nested-HV virtualization.
+
+HV-KVM on POWER9 and later systems allows for "nested-HV"
+virtualization, which provides a way for a guest VM to run guests that
+can run using the CPU's supervisor mode (privileged non-hypervisor
+state).  Enabling this capability on a VM depends on the CPU having
+the necessary functionality and on the facility being enabled with a
+kvm-hv module parameter.
+
 8. Other capabilities.
 ----------------------
 
index 88362ccda5498110a04de2a16cbe8fdfe4cd5efe..9b89b1918dfcc689ffe50631895a787ab639fb19 100644 (file)
@@ -325,6 +325,7 @@ struct kvmppc_ops {
        int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
                            unsigned long flags);
        void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr);
+       int (*enable_nested)(struct kvm *kvm);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
index f3cdf51d0191a417e170c4521203a146d9c69b45..89bcf923d5420e130d326b89d9e4aacfe7dcc68d 100644 (file)
@@ -122,6 +122,16 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
 MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
 #endif
 
+/* If set, guests are allowed to create and control nested guests */
+static bool nested = true;
+module_param(nested, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)");
+
+static inline bool nesting_enabled(struct kvm *kvm)
+{
+       return kvm->arch.nested_enable && kvm_is_radix(kvm);
+}
+
 /* If set, the threads on each CPU core have to be in the same MMU mode */
 static bool no_mixing_hpt_and_radix;
 
@@ -963,12 +973,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
        case H_SET_PARTITION_TABLE:
                ret = H_FUNCTION;
-               if (vcpu->kvm->arch.nested_enable)
+               if (nesting_enabled(vcpu->kvm))
                        ret = kvmhv_set_partition_table(vcpu);
                break;
        case H_ENTER_NESTED:
                ret = H_FUNCTION;
-               if (!vcpu->kvm->arch.nested_enable)
+               if (!nesting_enabled(vcpu->kvm))
                        break;
                ret = kvmhv_enter_nested_guest(vcpu);
                if (ret == H_INTERRUPT) {
@@ -978,9 +988,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
                break;
        case H_TLB_INVALIDATE:
                ret = H_FUNCTION;
-               if (!vcpu->kvm->arch.nested_enable)
-                       break;
-               ret = kvmhv_do_nested_tlbie(vcpu);
+               if (nesting_enabled(vcpu->kvm))
+                       ret = kvmhv_do_nested_tlbie(vcpu);
                break;
 
        default:
@@ -4508,10 +4517,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 /* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */
 int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
 {
-       if (kvm->arch.nested_enable) {
-               kvm->arch.nested_enable = false;
+       if (nesting_enabled(kvm))
                kvmhv_release_all_nested(kvm);
-       }
        kvmppc_free_radix(kvm);
        kvmppc_update_lpcr(kvm, LPCR_VPM1,
                           LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
@@ -4788,7 +4795,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 
        /* Perform global invalidation and return lpid to the pool */
        if (cpu_has_feature(CPU_FTR_ARCH_300)) {
-               if (kvm->arch.nested_enable)
+               if (nesting_enabled(kvm))
                        kvmhv_release_all_nested(kvm);
                kvm->arch.process_table = 0;
                kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
@@ -5181,6 +5188,19 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
        return err;
 }
 
+static int kvmhv_enable_nested(struct kvm *kvm)
+{
+       if (!nested)
+               return -EPERM;
+       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+               return -ENODEV;
+
+       /* kvm == NULL means the caller is testing if the capability exists */
+       if (kvm)
+               kvm->arch.nested_enable = true;
+       return 0;
+}
+
 static struct kvmppc_ops kvm_ops_hv = {
        .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
        .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -5220,6 +5240,7 @@ static struct kvmppc_ops kvm_ops_hv = {
        .configure_mmu = kvmhv_configure_mmu,
        .get_rmmu_info = kvmhv_get_rmmu_info,
        .set_smt_mode = kvmhv_set_smt_mode,
+       .enable_nested = kvmhv_enable_nested,
 };
 
 static int kvm_init_subcore_bitmap(void)
index 1f4b128894a01990051340234356028905a1d374..2869a299c4edc66445b8e841bb16b3e018f75cd0 100644 (file)
@@ -597,6 +597,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300) &&
                       cpu_has_feature(CPU_FTR_HVMODE));
                break;
+       case KVM_CAP_PPC_NESTED_HV:
+               r = !!(hv_enabled && kvmppc_hv_ops->enable_nested &&
+                      !kvmppc_hv_ops->enable_nested(NULL));
+               break;
 #endif
        case KVM_CAP_SYNC_MMU:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -2115,6 +2119,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
                        r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
                break;
        }
+
+       case KVM_CAP_PPC_NESTED_HV:
+               r = -EINVAL;
+               if (!is_kvmppc_hv_enabled(kvm) ||
+                   !kvm->arch.kvm_ops->enable_nested)
+                       break;
+               r = kvm->arch.kvm_ops->enable_nested(kvm);
+               break;
 #endif
        default:
                r = -EINVAL;
index 251be353f950b35082eff384a97028f64ff593e5..d9cec6b5cb3740dbc27d5d6fefffe160f60f5695 100644 (file)
@@ -953,6 +953,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_NESTED_STATE 157
 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158
 #define KVM_CAP_MSR_PLATFORM_INFO 159
+#define KVM_CAP_PPC_NESTED_HV 160
 
 #ifdef KVM_CAP_IRQ_ROUTING