KVM: x86: Introduce KVM_TDX_GET_CPUID
authorXiaoyao Li <xiaoyao.li@intel.com>
Wed, 30 Oct 2024 19:00:37 +0000 (12:00 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 14 Mar 2025 18:20:51 +0000 (14:20 -0400)
Implement an IOCTL to allow userspace to read the CPUID bit values for a
configured TD.

The TDX module doesn't provide the ability to set all CPUID bits. Instead
some are configured indirectly, or have fixed values. But it does allow
for the final resulting CPUID bits to be read. This information will be
useful for userspace to understand the configuration of the TD, and set
KVM's copy via KVM_SET_CPUID2.

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Co-developed-by: Tony Lindgren <tony.lindgren@linux.intel.com>
Signed-off-by: Tony Lindgren <tony.lindgren@linux.intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
 - Fix subleaf mask check (Binbin)
 - Search all possible sub-leafs (Francesco Lavra)
 - Reduce off-by-one error sensitve code (Francesco, Xiaoyao)
 - Handle buffers too small from userspace (Xiaoyao)
 - Read max CPUID from TD instead of using fixed values (Xiaoyao)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/uapi/asm/kvm.h
arch/x86/kvm/vmx/tdx.c
arch/x86/kvm/vmx/tdx_arch.h
arch/x86/kvm/vmx/tdx_errno.h

index 9316afbd4a88fc9fd64e2b900c8cd836cc5a4f8a..cd55484e3f0c24cd7f4df19e687604b0c87d6468 100644 (file)
@@ -932,6 +932,7 @@ enum kvm_tdx_cmd_id {
        KVM_TDX_CAPABILITIES = 0,
        KVM_TDX_INIT_VM,
        KVM_TDX_INIT_VCPU,
+       KVM_TDX_GET_CPUID,
 
        KVM_TDX_CMD_NR_MAX,
 };
index 4ba46ac3b9af732a836f246e1f6e159122a9a6fa..2ac925ecccd56890f7db3f9838c448f18daf8b15 100644 (file)
@@ -3,6 +3,7 @@
 #include <asm/cpufeature.h>
 #include <asm/tdx.h>
 #include "capabilities.h"
+#include "mmu.h"
 #include "x86_ops.h"
 #include "lapic.h"
 #include "tdx.h"
@@ -849,6 +850,103 @@ free_hkid:
        return ret;
 }
 
+static u64 tdx_td_metadata_field_read(struct kvm_tdx *tdx, u64 field_id,
+                                     u64 *data)
+{
+       u64 err;
+
+       err = tdh_mng_rd(&tdx->td, field_id, data);
+
+       return err;
+}
+
+#define TDX_MD_UNREADABLE_LEAF_MASK    GENMASK(30, 7)
+#define TDX_MD_UNREADABLE_SUBLEAF_MASK GENMASK(31, 7)
+
+static int tdx_read_cpuid(struct kvm_vcpu *vcpu, u32 leaf, u32 sub_leaf,
+                         bool sub_leaf_set, int *entry_index,
+                         struct kvm_cpuid_entry2 *out)
+{
+       struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
+       u64 field_id = TD_MD_FIELD_ID_CPUID_VALUES;
+       u64 ebx_eax, edx_ecx;
+       u64 err = 0;
+
+       if (sub_leaf > 0b1111111)
+               return -EINVAL;
+
+       if (*entry_index >= KVM_MAX_CPUID_ENTRIES)
+               return -EINVAL;
+
+       if (leaf & TDX_MD_UNREADABLE_LEAF_MASK ||
+           sub_leaf & TDX_MD_UNREADABLE_SUBLEAF_MASK)
+               return -EINVAL;
+
+       /*
+        * bit 23:17, REVSERVED: reserved, must be 0;
+        * bit 16,    LEAF_31: leaf number bit 31;
+        * bit 15:9,  LEAF_6_0: leaf number bits 6:0, leaf bits 30:7 are
+        *                      implicitly 0;
+        * bit 8,     SUBLEAF_NA: sub-leaf not applicable flag;
+        * bit 7:1,   SUBLEAF_6_0: sub-leaf number bits 6:0. If SUBLEAF_NA is 1,
+        *                         the SUBLEAF_6_0 is all-1.
+        *                         sub-leaf bits 31:7 are implicitly 0;
+        * bit 0,     ELEMENT_I: Element index within field;
+        */
+       field_id |= ((leaf & 0x80000000) ? 1 : 0) << 16;
+       field_id |= (leaf & 0x7f) << 9;
+       if (sub_leaf_set)
+               field_id |= (sub_leaf & 0x7f) << 1;
+       else
+               field_id |= 0x1fe;
+
+       err = tdx_td_metadata_field_read(kvm_tdx, field_id, &ebx_eax);
+       if (err) //TODO check for specific errors
+               goto err_out;
+
+       out->eax = (u32) ebx_eax;
+       out->ebx = (u32) (ebx_eax >> 32);
+
+       field_id++;
+       err = tdx_td_metadata_field_read(kvm_tdx, field_id, &edx_ecx);
+       /*
+        * It's weird that reading edx_ecx fails while reading ebx_eax
+        * succeeded.
+        */
+       if (WARN_ON_ONCE(err))
+               goto err_out;
+
+       out->ecx = (u32) edx_ecx;
+       out->edx = (u32) (edx_ecx >> 32);
+
+       out->function = leaf;
+       out->index = sub_leaf;
+       out->flags |= sub_leaf_set ? KVM_CPUID_FLAG_SIGNIFCANT_INDEX : 0;
+
+       /*
+        * Work around missing support on old TDX modules, fetch
+        * guest maxpa from gfn_direct_bits.
+        */
+       if (leaf == 0x80000008) {
+               gpa_t gpa_bits = gfn_to_gpa(kvm_gfn_direct_bits(vcpu->kvm));
+               unsigned int g_maxpa = __ffs(gpa_bits) + 1;
+
+               out->eax = tdx_set_guest_phys_addr_bits(out->eax, g_maxpa);
+       }
+
+       (*entry_index)++;
+
+       return 0;
+
+err_out:
+       out->eax = 0;
+       out->ebx = 0;
+       out->ecx = 0;
+       out->edx = 0;
+
+       return -EIO;
+}
+
 static int tdx_td_init(struct kvm *kvm, struct kvm_tdx_cmd *cmd)
 {
        struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
@@ -1043,6 +1141,96 @@ free_tdvpr:
        return ret;
 }
 
+/* Sometimes reads multipple subleafs. Return how many enties were written. */
+static int tdx_vcpu_get_cpuid_leaf(struct kvm_vcpu *vcpu, u32 leaf, int *entry_index,
+                                  struct kvm_cpuid_entry2 *output_e)
+{
+       int sub_leaf = 0;
+       int ret;
+
+       /* First try without a subleaf */
+       ret = tdx_read_cpuid(vcpu, leaf, 0, false, entry_index, output_e);
+
+       /* If success, or invalid leaf, just give up */
+       if (ret != -EIO)
+               return ret;
+
+       /*
+        * If the try without a subleaf failed, try reading subleafs until
+        * failure. The TDX module only supports 6 bits of subleaf index.
+        */
+       while (1) {
+               /* Keep reading subleafs until there is a failure. */
+               if (tdx_read_cpuid(vcpu, leaf, sub_leaf, true, entry_index, output_e))
+                       return !sub_leaf;
+
+               sub_leaf++;
+               output_e++;
+       }
+
+       return 0;
+}
+
+static int tdx_vcpu_get_cpuid(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *cmd)
+{
+       struct kvm_cpuid2 __user *output, *td_cpuid;
+       int r = 0, i = 0, leaf;
+       u32 level;
+
+       output = u64_to_user_ptr(cmd->data);
+       td_cpuid = kzalloc(sizeof(*td_cpuid) +
+                       sizeof(output->entries[0]) * KVM_MAX_CPUID_ENTRIES,
+                       GFP_KERNEL);
+       if (!td_cpuid)
+               return -ENOMEM;
+
+       if (copy_from_user(td_cpuid, output, sizeof(*output))) {
+               r = -EFAULT;
+               goto out;
+       }
+
+       /* Read max CPUID for normal range */
+       if (tdx_vcpu_get_cpuid_leaf(vcpu, 0, &i, &td_cpuid->entries[i])) {
+               r = -EIO;
+               goto out;
+       }
+       level = td_cpuid->entries[0].eax;
+
+       for (leaf = 1; leaf <= level; leaf++)
+               tdx_vcpu_get_cpuid_leaf(vcpu, leaf, &i, &td_cpuid->entries[i]);
+
+       /* Read max CPUID for extended range */
+       if (tdx_vcpu_get_cpuid_leaf(vcpu, 0x80000000, &i, &td_cpuid->entries[i])) {
+               r = -EIO;
+               goto out;
+       }
+       level = td_cpuid->entries[i - 1].eax;
+
+       for (leaf = 0x80000001; leaf <= level; leaf++)
+               tdx_vcpu_get_cpuid_leaf(vcpu, leaf, &i, &td_cpuid->entries[i]);
+
+       if (td_cpuid->nent < i)
+               r = -E2BIG;
+       td_cpuid->nent = i;
+
+       if (copy_to_user(output, td_cpuid, sizeof(*output))) {
+               r = -EFAULT;
+               goto out;
+       }
+
+       if (r == -E2BIG)
+               goto out;
+
+       if (copy_to_user(output->entries, td_cpuid->entries,
+                        td_cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
+               r = -EFAULT;
+
+out:
+       kfree(td_cpuid);
+
+       return r;
+}
+
 static int tdx_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *cmd)
 {
        u64 apic_base;
@@ -1092,6 +1280,9 @@ int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
        case KVM_TDX_INIT_VCPU:
                ret = tdx_vcpu_init(vcpu, &cmd);
                break;
+       case KVM_TDX_GET_CPUID:
+               ret = tdx_vcpu_get_cpuid(vcpu, &cmd);
+               break;
        default:
                ret = -EINVAL;
                break;
index 0e09188a878558f6d70d3199b5a1bd1246b2d84b..55a740f90e67264aff4250bb523353ea67c1a5a9 100644 (file)
@@ -123,4 +123,9 @@ struct td_params {
 
 #define MD_FIELD_ID_FEATURES0_TOPOLOGY_ENUM    BIT_ULL(20)
 
+/*
+ * TD scope metadata field ID.
+ */
+#define TD_MD_FIELD_ID_CPUID_VALUES            0x9410000300000000ULL
+
 #endif /* __KVM_X86_TDX_ARCH_H */
index dc3fa2a58c2cc0e8a614428b97a76ccf00cf3087..f9dbb3a065ccd25c6fd8fcf3b29771ace95e2559 100644 (file)
@@ -23,6 +23,7 @@
 #define TDX_FLUSHVP_NOT_DONE                   0x8000082400000000ULL
 #define TDX_EPT_WALK_FAILED                    0xC0000B0000000000ULL
 #define TDX_EPT_ENTRY_STATE_INCORRECT          0xC0000B0D00000000ULL
+#define TDX_METADATA_FIELD_NOT_READABLE                0xC0000C0200000000ULL
 
 /*
  * TDX module operand ID, appears in 31:0 part of error code as