KVM: TDX: Do TDX specific vcpu initialization
authorIsaku Yamahata <isaku.yamahata@intel.com>
Wed, 30 Oct 2024 19:00:36 +0000 (12:00 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 14 Mar 2025 18:20:51 +0000 (14:20 -0400)
TD guest vcpu needs TDX specific initialization before running.  Repurpose
KVM_MEMORY_ENCRYPT_OP to vcpu-scope, add a new sub-command
KVM_TDX_INIT_VCPU, and implement the callback for it.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Co-developed-by: Tony Lindgren <tony.lindgren@linux.intel.com>
Signed-off-by: Tony Lindgren <tony.lindgren@linux.intel.com>
Co-developed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
 - Fix comment: https://lore.kernel.org/kvm/Z36OYfRW9oPjW8be@google.com/
   (Sean)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/kvm-x86-ops.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/uapi/asm/kvm.h
arch/x86/kvm/lapic.c
arch/x86/kvm/vmx/main.c
arch/x86/kvm/vmx/tdx.c
arch/x86/kvm/vmx/tdx.h
arch/x86/kvm/vmx/x86_ops.h
arch/x86/kvm/x86.c

index e6cb89ced1fd792e0c5d6cb057ea0036b396555d..aae4193f80e3c04b3b3d44f8072d85ecdf73039d 100644 (file)
@@ -127,6 +127,7 @@ KVM_X86_OP(enable_smi_window)
 #endif
 KVM_X86_OP_OPTIONAL(dev_get_attr)
 KVM_X86_OP(mem_enc_ioctl)
+KVM_X86_OP_OPTIONAL(vcpu_mem_enc_ioctl)
 KVM_X86_OP_OPTIONAL(mem_enc_register_region)
 KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
 KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
index 405d3892427923e082687756048e82098439136e..6800d3956ab12c342bf94baf6c2d8413eafbd72a 100644 (file)
@@ -1849,6 +1849,7 @@ struct kvm_x86_ops {
 
        int (*dev_get_attr)(u32 group, u64 attr, u64 *val);
        int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
+       int (*vcpu_mem_enc_ioctl)(struct kvm_vcpu *vcpu, void __user *argp);
        int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
        int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
        int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
index b64351076f2a8ff66758125ac46fd28dc5686a00..9316afbd4a88fc9fd64e2b900c8cd836cc5a4f8a 100644 (file)
@@ -931,6 +931,7 @@ struct kvm_hyperv_eventfd {
 enum kvm_tdx_cmd_id {
        KVM_TDX_CAPABILITIES = 0,
        KVM_TDX_INIT_VM,
+       KVM_TDX_INIT_VCPU,
 
        KVM_TDX_CMD_NR_MAX,
 };
index a009c94c26c2a09c39457b02047a4e9e414cb6f0..a1cbca31ec3096040cbf75367517d16102c0e981 100644 (file)
@@ -2657,6 +2657,7 @@ int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated)
        kvm_recalculate_apic_map(vcpu->kvm);
        return 0;
 }
+EXPORT_SYMBOL_GPL(kvm_apic_set_base);
 
 void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
 {
index a840a84815898f5eefd3fa60fe1020c83198ccde..e7d402b3a90d4e2d28244727ebecf1ead7195384 100644 (file)
@@ -106,6 +106,14 @@ static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
        return tdx_vm_ioctl(kvm, argp);
 }
 
+static int vt_vcpu_mem_enc_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
+{
+       if (!is_td_vcpu(vcpu))
+               return -EINVAL;
+
+       return tdx_vcpu_ioctl(vcpu, argp);
+}
+
 #define VMX_REQUIRED_APICV_INHIBITS                            \
        (BIT(APICV_INHIBIT_REASON_DISABLED) |                   \
         BIT(APICV_INHIBIT_REASON_ABSENT) |                     \
@@ -262,6 +270,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
        .get_untagged_addr = vmx_get_untagged_addr,
 
        .mem_enc_ioctl = vt_mem_enc_ioctl,
+       .vcpu_mem_enc_ioctl = vt_vcpu_mem_enc_ioctl,
 };
 
 struct kvm_x86_init_ops vt_init_ops __initdata = {
index e7bcea016f6401b350679ad8c3aba6f4ad508722..4ba46ac3b9af732a836f246e1f6e159122a9a6fa 100644 (file)
@@ -410,6 +410,7 @@ int tdx_vm_init(struct kvm *kvm)
 int tdx_vcpu_create(struct kvm_vcpu *vcpu)
 {
        struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
+       struct vcpu_tdx *tdx = to_tdx(vcpu);
 
        if (kvm_tdx->state != TD_STATE_INITIALIZED)
                return -EIO;
@@ -438,12 +439,42 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu)
        if ((kvm_tdx->xfam & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE)
                vcpu->arch.xfd_no_write_intercept = true;
 
+       tdx->state = VCPU_TD_STATE_UNINITIALIZED;
+
        return 0;
 }
 
 void tdx_vcpu_free(struct kvm_vcpu *vcpu)
 {
-       /* This is stub for now.  More logic will come. */
+       struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
+       struct vcpu_tdx *tdx = to_tdx(vcpu);
+       int i;
+
+       /*
+        * It is not possible to reclaim pages while hkid is assigned. It might
+        * be assigned if:
+        * 1. the TD VM is being destroyed but freeing hkid failed, in which
+        * case the pages are leaked
+        * 2. TD VCPU creation failed and this on the error path, in which case
+        * there is nothing to do anyway
+        */
+       if (is_hkid_assigned(kvm_tdx))
+               return;
+
+       if (tdx->vp.tdcx_pages) {
+               for (i = 0; i < kvm_tdx->td.tdcx_nr_pages; i++) {
+                       if (tdx->vp.tdcx_pages[i])
+                               tdx_reclaim_control_page(tdx->vp.tdcx_pages[i]);
+               }
+               kfree(tdx->vp.tdcx_pages);
+               tdx->vp.tdcx_pages = NULL;
+       }
+       if (tdx->vp.tdvpr_page) {
+               tdx_reclaim_control_page(tdx->vp.tdvpr_page);
+               tdx->vp.tdvpr_page = 0;
+       }
+
+       tdx->state = VCPU_TD_STATE_UNINITIALIZED;
 }
 
 static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
@@ -653,6 +684,8 @@ static int __tdx_td_init(struct kvm *kvm, struct td_params *td_params,
                goto free_hkid;
 
        kvm_tdx->td.tdcs_nr_pages = tdx_sysinfo->td_ctrl.tdcs_base_size / PAGE_SIZE;
+       /* TDVPS = TDVPR(4K page) + TDCX(multiple 4K pages), -1 for TDVPR. */
+       kvm_tdx->td.tdcx_nr_pages = tdx_sysinfo->td_ctrl.tdvps_base_size / PAGE_SIZE - 1;
        tdcs_pages = kcalloc(kvm_tdx->td.tdcs_nr_pages, sizeof(*kvm_tdx->td.tdcs_pages),
                             GFP_KERNEL | __GFP_ZERO);
        if (!tdcs_pages)
@@ -930,6 +963,143 @@ out:
        return r;
 }
 
+/* VMM can pass one 64bit auxiliary data to vcpu via RCX for guest BIOS. */
+static int tdx_td_vcpu_init(struct kvm_vcpu *vcpu, u64 vcpu_rcx)
+{
+       struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
+       struct vcpu_tdx *tdx = to_tdx(vcpu);
+       struct page *page;
+       int ret, i;
+       u64 err;
+
+       page = alloc_page(GFP_KERNEL);
+       if (!page)
+               return -ENOMEM;
+       tdx->vp.tdvpr_page = page;
+
+       tdx->vp.tdcx_pages = kcalloc(kvm_tdx->td.tdcx_nr_pages, sizeof(*tdx->vp.tdcx_pages),
+                                    GFP_KERNEL);
+       if (!tdx->vp.tdcx_pages) {
+               ret = -ENOMEM;
+               goto free_tdvpr;
+       }
+
+       for (i = 0; i < kvm_tdx->td.tdcx_nr_pages; i++) {
+               page = alloc_page(GFP_KERNEL);
+               if (!page) {
+                       ret = -ENOMEM;
+                       goto free_tdcx;
+               }
+               tdx->vp.tdcx_pages[i] = page;
+       }
+
+       err = tdh_vp_create(&kvm_tdx->td, &tdx->vp);
+       if (KVM_BUG_ON(err, vcpu->kvm)) {
+               ret = -EIO;
+               pr_tdx_error(TDH_VP_CREATE, err);
+               goto free_tdcx;
+       }
+
+       for (i = 0; i < kvm_tdx->td.tdcx_nr_pages; i++) {
+               err = tdh_vp_addcx(&tdx->vp, tdx->vp.tdcx_pages[i]);
+               if (KVM_BUG_ON(err, vcpu->kvm)) {
+                       pr_tdx_error(TDH_VP_ADDCX, err);
+                       /*
+                        * Pages already added are reclaimed by the vcpu_free
+                        * method, but the rest are freed here.
+                        */
+                       for (; i < kvm_tdx->td.tdcx_nr_pages; i++) {
+                               __free_page(tdx->vp.tdcx_pages[i]);
+                               tdx->vp.tdcx_pages[i] = NULL;
+                       }
+                       return -EIO;
+               }
+       }
+
+       err = tdh_vp_init(&tdx->vp, vcpu_rcx, vcpu->vcpu_id);
+       if (KVM_BUG_ON(err, vcpu->kvm)) {
+               pr_tdx_error(TDH_VP_INIT, err);
+               return -EIO;
+       }
+
+       vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
+       return 0;
+
+free_tdcx:
+       for (i = 0; i < kvm_tdx->td.tdcx_nr_pages; i++) {
+               if (tdx->vp.tdcx_pages[i])
+                       __free_page(tdx->vp.tdcx_pages[i]);
+               tdx->vp.tdcx_pages[i] = NULL;
+       }
+       kfree(tdx->vp.tdcx_pages);
+       tdx->vp.tdcx_pages = NULL;
+
+free_tdvpr:
+       if (tdx->vp.tdvpr_page)
+               __free_page(tdx->vp.tdvpr_page);
+       tdx->vp.tdvpr_page = 0;
+
+       return ret;
+}
+
+static int tdx_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *cmd)
+{
+       u64 apic_base;
+       struct vcpu_tdx *tdx = to_tdx(vcpu);
+       int ret;
+
+       if (cmd->flags)
+               return -EINVAL;
+
+       if (tdx->state != VCPU_TD_STATE_UNINITIALIZED)
+               return -EINVAL;
+
+       /*
+        * TDX requires X2APIC, userspace is responsible for configuring guest
+        * CPUID accordingly.
+        */
+       apic_base = APIC_DEFAULT_PHYS_BASE | LAPIC_MODE_X2APIC |
+               (kvm_vcpu_is_reset_bsp(vcpu) ? MSR_IA32_APICBASE_BSP : 0);
+       if (kvm_apic_set_base(vcpu, apic_base, true))
+               return -EINVAL;
+
+       ret = tdx_td_vcpu_init(vcpu, (u64)cmd->data);
+       if (ret)
+               return ret;
+
+       tdx->state = VCPU_TD_STATE_INITIALIZED;
+
+       return 0;
+}
+
+int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
+{
+       struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
+       struct kvm_tdx_cmd cmd;
+       int ret;
+
+       if (!is_hkid_assigned(kvm_tdx) || kvm_tdx->state == TD_STATE_RUNNABLE)
+               return -EINVAL;
+
+       if (copy_from_user(&cmd, argp, sizeof(cmd)))
+               return -EFAULT;
+
+       if (cmd.hw_error)
+               return -EINVAL;
+
+       switch (cmd.id) {
+       case KVM_TDX_INIT_VCPU:
+               ret = tdx_vcpu_init(vcpu, &cmd);
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
+}
+
 static int tdx_online_cpu(unsigned int cpu)
 {
        unsigned long flags;
index 6e23f78c2713698679198ac7e734450f84c350a1..6ec7ac1d91e3c591c6ca6a737fe67bf3e4671956 100644 (file)
@@ -33,9 +33,18 @@ struct kvm_tdx {
        struct tdx_td td;
 };
 
+/* TDX module vCPU states */
+enum vcpu_tdx_state {
+       VCPU_TD_STATE_UNINITIALIZED = 0,
+       VCPU_TD_STATE_INITIALIZED,
+};
+
 struct vcpu_tdx {
        struct kvm_vcpu vcpu;
-       /* TDX specific members follow. */
+
+       struct tdx_vp vp;
+
+       enum vcpu_tdx_state state;
 };
 
 static inline bool is_td(struct kvm *kvm)
index c57a69a9c7d06ebdde5032e0a091ee012d1e5924..89bb7785bd0985fb7ba394ccbdd8e32bc65e86bc 100644 (file)
@@ -129,6 +129,8 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
 
 int tdx_vcpu_create(struct kvm_vcpu *vcpu);
 void tdx_vcpu_free(struct kvm_vcpu *vcpu);
+
+int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
 #else
 static inline int tdx_vm_init(struct kvm *kvm) { return -EOPNOTSUPP; }
 static inline void tdx_mmu_release_hkid(struct kvm *kvm) {}
@@ -137,6 +139,8 @@ static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOP
 
 static inline int tdx_vcpu_create(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; }
 static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {}
+
+static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }
 #endif
 
 #endif /* __KVM_X86_VMX_X86_OPS_H */
index a4d1f3319a5baf90a82d030dd2ec5de093080b0a..9f92170226e5dacc7abbacbb51008aa12f9f2a3b 100644 (file)
@@ -6287,6 +6287,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        case KVM_SET_DEVICE_ATTR:
                r = kvm_vcpu_ioctl_device_attr(vcpu, ioctl, argp);
                break;
+       case KVM_MEMORY_ENCRYPT_OP:
+               r = -ENOTTY;
+               if (!kvm_x86_ops.vcpu_mem_enc_ioctl)
+                       goto out;
+               r = kvm_x86_ops.vcpu_mem_enc_ioctl(vcpu, argp);
+               break;
        default:
                r = -EINVAL;
        }
@@ -12676,6 +12682,7 @@ bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
 {
        return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
 
 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
 {