KVM: TDX: Add a place holder to handle TDX VM exit
authorIsaku Yamahata <isaku.yamahata@intel.com>
Thu, 6 Mar 2025 18:27:04 +0000 (13:27 -0500)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 14 Mar 2025 18:20:54 +0000 (14:20 -0400)
Introduce the wiring for handling TDX VM exits by implementing the
callbacks .get_exit_info(), .get_entry_info(), and .handle_exit().
Additionally, add error handling during the TDX VM exit flow, and add a
place holder to handle various exit reasons.

Store VMX exit reason and exit qualification in struct vcpu_vt for TDX,
so that TDX/VMX can use the same helpers to get exit reason and exit
qualification. Store extended exit qualification and exit GPA info in
struct vcpu_tdx because they are used by TDX code only.

Contention Handling: The TDH.VP.ENTER operation may contend with TDH.MEM.*
operations due to secure EPT or TD EPOCH. If the contention occurs,
the return value will have TDX_OPERAND_BUSY set, prompting the vCPU to
attempt re-entry into the guest with EXIT_FASTPATH_EXIT_HANDLED,
not EXIT_FASTPATH_REENTER_GUEST, so that the interrupts pending during
IN_GUEST_MODE can be delivered for sure. Otherwise, the requester of
KVM_REQ_OUTSIDE_GUEST_MODE may be blocked endlessly.

Error Handling:
- TDX_SW_ERROR: This includes #UD caused by SEAMCALL instruction if the
  CPU isn't in VMX operation, #GP caused by SEAMCALL instruction when TDX
  isn't enabled by the BIOS, and TDX_SEAMCALL_VMFAILINVALID when SEAM
  firmware is not loaded or disabled.
- TDX_ERROR: This indicates some check failed in the TDX module, preventing
  the vCPU from running.
- Failed VM Entry: Exit to userspace with KVM_EXIT_FAIL_ENTRY. Handle it
  separately before handling TDX_NON_RECOVERABLE because when off-TD debug
  is not enabled, TDX_NON_RECOVERABLE is set.
- TDX_NON_RECOVERABLE: Set by the TDX module when the error is
  non-recoverable, indicating that the TDX guest is dead or the vCPU is
  disabled.
  A special case is triple fault, which also sets TDX_NON_RECOVERABLE but
  exits to userspace with KVM_EXIT_SHUTDOWN, aligning with the VMX case.
- Any unhandled VM exit reason will also return to userspace with
  KVM_EXIT_INTERNAL_ERROR.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Co-developed-by: Binbin Wu <binbin.wu@linux.intel.com>
Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Message-ID: <20250222014225.897298-4-binbin.wu@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/tdx.h
arch/x86/kvm/vmx/main.c
arch/x86/kvm/vmx/tdx.c
arch/x86/kvm/vmx/tdx.h
arch/x86/kvm/vmx/tdx_errno.h
arch/x86/kvm/vmx/x86_ops.h

index bb4a3bc2621955e7bec6b25ece0b860f9d5d2922..6d74f4d8c573140bf38f4ec7d6eb12c799d1fc5d 100644 (file)
@@ -19,6 +19,7 @@
  * TDX module.
  */
 #define TDX_ERROR                      _BITUL(63)
+#define TDX_NON_RECOVERABLE            _BITUL(62)
 #define TDX_SW_ERROR                   (TDX_ERROR | GENMASK_ULL(47, 40))
 #define TDX_SEAMCALL_VMFAILINVALID     (TDX_SW_ERROR | _UL(0xFFFF0000))
 
index c0497ed0c9be00989082c3f5b1f63ef1d156869c..a567865baf9b6c4b61521e2ed5bef26675ae4895 100644 (file)
@@ -181,6 +181,15 @@ static fastpath_t vt_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
        return vmx_vcpu_run(vcpu, force_immediate_exit);
 }
 
+static int vt_handle_exit(struct kvm_vcpu *vcpu,
+                         enum exit_fastpath_completion fastpath)
+{
+       if (is_td_vcpu(vcpu))
+               return tdx_handle_exit(vcpu, fastpath);
+
+       return vmx_handle_exit(vcpu, fastpath);
+}
+
 static void vt_flush_tlb_all(struct kvm_vcpu *vcpu)
 {
        if (is_td_vcpu(vcpu)) {
@@ -228,6 +237,29 @@ static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
        vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
 }
 
+static void vt_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code)
+{
+       *intr_info = 0;
+       *error_code = 0;
+
+       if (is_td_vcpu(vcpu))
+               return;
+
+       vmx_get_entry_info(vcpu, intr_info, error_code);
+}
+
+static void vt_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
+                       u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code)
+{
+       if (is_td_vcpu(vcpu)) {
+               tdx_get_exit_info(vcpu, reason, info1, info2, intr_info,
+                                 error_code);
+               return;
+       }
+
+       vmx_get_exit_info(vcpu, reason, info1, info2, intr_info, error_code);
+}
+
 static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
 {
        if (!is_td(kvm))
@@ -323,7 +355,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
 
        .vcpu_pre_run = vt_vcpu_pre_run,
        .vcpu_run = vt_vcpu_run,
-       .handle_exit = vmx_handle_exit,
+       .handle_exit = vt_handle_exit,
        .skip_emulated_instruction = vmx_skip_emulated_instruction,
        .update_emulated_instruction = vmx_update_emulated_instruction,
        .set_interrupt_shadow = vmx_set_interrupt_shadow,
@@ -357,8 +389,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
        .set_identity_map_addr = vmx_set_identity_map_addr,
        .get_mt_mask = vmx_get_mt_mask,
 
-       .get_exit_info = vmx_get_exit_info,
-       .get_entry_info = vmx_get_entry_info,
+       .get_exit_info = vt_get_exit_info,
+       .get_entry_info = vt_get_entry_info,
 
        .vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid,
 
index a6388eb95988b3cc313136bb02bf6b9c9bdeb2d2..ec105c1fbb7839e2bbd139398666a257190262ce 100644 (file)
@@ -783,17 +783,70 @@ int tdx_vcpu_pre_run(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+static __always_inline u32 tdx_to_vmx_exit_reason(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_tdx *tdx = to_tdx(vcpu);
+
+       switch (tdx->vp_enter_ret & TDX_SEAMCALL_STATUS_MASK) {
+       case TDX_SUCCESS:
+       case TDX_NON_RECOVERABLE_VCPU:
+       case TDX_NON_RECOVERABLE_TD:
+       case TDX_NON_RECOVERABLE_TD_NON_ACCESSIBLE:
+       case TDX_NON_RECOVERABLE_TD_WRONG_APIC_MODE:
+               break;
+       default:
+               return -1u;
+       }
+
+       return tdx->vp_enter_ret;
+}
+
 static noinstr void tdx_vcpu_enter_exit(struct kvm_vcpu *vcpu)
 {
        struct vcpu_tdx *tdx = to_tdx(vcpu);
+       struct vcpu_vt *vt = to_vt(vcpu);
 
        guest_state_enter_irqoff();
 
        tdx->vp_enter_ret = tdh_vp_enter(&tdx->vp, &tdx->vp_enter_args);
 
+       vt->exit_reason.full = tdx_to_vmx_exit_reason(vcpu);
+
+       vt->exit_qualification = tdx->vp_enter_args.rcx;
+       tdx->ext_exit_qualification = tdx->vp_enter_args.rdx;
+       tdx->exit_gpa = tdx->vp_enter_args.r8;
+       vt->exit_intr_info = tdx->vp_enter_args.r9;
+
        guest_state_exit_irqoff();
 }
 
+static bool tdx_failed_vmentry(struct kvm_vcpu *vcpu)
+{
+       return vmx_get_exit_reason(vcpu).failed_vmentry &&
+              vmx_get_exit_reason(vcpu).full != -1u;
+}
+
+static fastpath_t tdx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+{
+       u64 vp_enter_ret = to_tdx(vcpu)->vp_enter_ret;
+
+       /*
+        * TDX_OPERAND_BUSY could be returned for SEPT due to 0-step mitigation
+        * or for TD EPOCH due to contention with TDH.MEM.TRACK on TDH.VP.ENTER.
+        *
+        * When KVM requests KVM_REQ_OUTSIDE_GUEST_MODE, which has both
+        * KVM_REQUEST_WAIT and KVM_REQUEST_NO_ACTION set, it requires target
+        * vCPUs leaving fastpath so that interrupt can be enabled to ensure the
+        * IPIs can be delivered. Return EXIT_FASTPATH_EXIT_HANDLED instead of
+        * EXIT_FASTPATH_REENTER_GUEST to exit fastpath, otherwise, the
+        * requester may be blocked endlessly.
+        */
+       if (unlikely(tdx_operand_busy(vp_enter_ret)))
+               return EXIT_FASTPATH_EXIT_HANDLED;
+
+       return EXIT_FASTPATH_NONE;
+}
+
 #define TDX_REGS_AVAIL_SET     (BIT_ULL(VCPU_EXREG_EXIT_INFO_1) | \
                                 BIT_ULL(VCPU_EXREG_EXIT_INFO_2) | \
                                 BIT_ULL(VCPU_REGS_RAX) | \
@@ -866,9 +919,18 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
 
        vcpu->arch.regs_avail &= TDX_REGS_AVAIL_SET;
 
+       if (unlikely((tdx->vp_enter_ret & TDX_SW_ERROR) == TDX_SW_ERROR))
+               return EXIT_FASTPATH_NONE;
+
+       if (unlikely(vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY))
+               kvm_machine_check();
+
        trace_kvm_exit(vcpu, KVM_ISA_VMX);
 
-       return EXIT_FASTPATH_NONE;
+       if (unlikely(tdx_failed_vmentry(vcpu)))
+               return EXIT_FASTPATH_NONE;
+
+       return tdx_exit_handlers_fastpath(vcpu);
 }
 
 void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
@@ -1184,6 +1246,83 @@ int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
        return tdx_sept_drop_private_spte(kvm, gfn, level, page);
 }
 
+int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
+{
+       struct vcpu_tdx *tdx = to_tdx(vcpu);
+       u64 vp_enter_ret = tdx->vp_enter_ret;
+       union vmx_exit_reason exit_reason = vmx_get_exit_reason(vcpu);
+
+       if (fastpath != EXIT_FASTPATH_NONE)
+               return 1;
+
+       /*
+        * Handle TDX SW errors, including TDX_SEAMCALL_UD, TDX_SEAMCALL_GP and
+        * TDX_SEAMCALL_VMFAILINVALID.
+        */
+       if (unlikely((vp_enter_ret & TDX_SW_ERROR) == TDX_SW_ERROR)) {
+               KVM_BUG_ON(!kvm_rebooting, vcpu->kvm);
+               goto unhandled_exit;
+       }
+
+       if (unlikely(tdx_failed_vmentry(vcpu))) {
+               /*
+                * If the guest state is protected, that means off-TD debug is
+                * not enabled, TDX_NON_RECOVERABLE must be set.
+                */
+               WARN_ON_ONCE(vcpu->arch.guest_state_protected &&
+                               !(vp_enter_ret & TDX_NON_RECOVERABLE));
+               vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+               vcpu->run->fail_entry.hardware_entry_failure_reason = exit_reason.full;
+               vcpu->run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu;
+               return 0;
+       }
+
+       if (unlikely(vp_enter_ret & (TDX_ERROR | TDX_NON_RECOVERABLE)) &&
+               exit_reason.basic != EXIT_REASON_TRIPLE_FAULT) {
+               kvm_pr_unimpl("TD vp_enter_ret 0x%llx\n", vp_enter_ret);
+               goto unhandled_exit;
+       }
+
+       WARN_ON_ONCE(exit_reason.basic != EXIT_REASON_TRIPLE_FAULT &&
+                    (vp_enter_ret & TDX_SEAMCALL_STATUS_MASK) != TDX_SUCCESS);
+
+       switch (exit_reason.basic) {
+       case EXIT_REASON_TRIPLE_FAULT:
+               vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+               vcpu->mmio_needed = 0;
+               return 0;
+       default:
+               break;
+       }
+
+unhandled_exit:
+       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+       vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+       vcpu->run->internal.ndata = 2;
+       vcpu->run->internal.data[0] = vp_enter_ret;
+       vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+       return 0;
+}
+
+void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
+               u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code)
+{
+       struct vcpu_tdx *tdx = to_tdx(vcpu);
+
+       *reason = tdx->vt.exit_reason.full;
+       if (*reason != -1u) {
+               *info1 = vmx_get_exit_qual(vcpu);
+               *info2 = tdx->ext_exit_qualification;
+               *intr_info = vmx_get_intr_info(vcpu);
+       } else {
+               *info1 = 0;
+               *info2 = 0;
+               *intr_info = 0;
+       }
+
+       *error_code = 0;
+}
+
 static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
 {
        const struct tdx_sys_info_td_conf *td_conf = &tdx_sysinfo->td_conf;
index 0a54d286e38060a0ea9c4e93e45b9daea191de20..a9c11a740ff553889eb75fd025d404da6d3a9074 100644 (file)
@@ -48,6 +48,8 @@ enum vcpu_tdx_state {
 struct vcpu_tdx {
        struct kvm_vcpu vcpu;
        struct vcpu_vt vt;
+       u64 ext_exit_qualification;
+       gpa_t exit_gpa;
        struct tdx_module_args vp_enter_args;
 
        struct tdx_vp vp;
index f9dbb3a065ccd25c6fd8fcf3b29771ace95e2559..6ff4672c41810c0990082a8d46db34420a29272b 100644 (file)
@@ -10,6 +10,9 @@
  * TDX SEAMCALL Status Codes (returned in RAX)
  */
 #define TDX_NON_RECOVERABLE_VCPU               0x4000000100000000ULL
+#define TDX_NON_RECOVERABLE_TD                 0x4000000200000000ULL
+#define TDX_NON_RECOVERABLE_TD_NON_ACCESSIBLE  0x6000000500000000ULL
+#define TDX_NON_RECOVERABLE_TD_WRONG_APIC_MODE 0x6000000700000000ULL
 #define TDX_INTERRUPTED_RESUMABLE              0x8000000300000000ULL
 #define TDX_OPERAND_INVALID                    0xC000010000000000ULL
 #define TDX_OPERAND_BUSY                       0x8000020000000000ULL
index cd18e9b1e1243ed105c49d5f46585aae288d67c0..2960d95c2c78654b8b4c9d8c0db9ffb69eb851e1 100644 (file)
@@ -135,6 +135,10 @@ int tdx_vcpu_pre_run(struct kvm_vcpu *vcpu);
 fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit);
 void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
 void tdx_vcpu_put(struct kvm_vcpu *vcpu);
+int tdx_handle_exit(struct kvm_vcpu *vcpu,
+               enum exit_fastpath_completion fastpath);
+void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
+               u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code);
 
 int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
 
@@ -168,6 +172,10 @@ static inline fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediat
 }
 static inline void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) {}
 static inline void tdx_vcpu_put(struct kvm_vcpu *vcpu) {}
+static inline int tdx_handle_exit(struct kvm_vcpu *vcpu,
+               enum exit_fastpath_completion fastpath) { return 0; }
+static inline void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, u64 *info1,
+                                    u64 *info2, u32 *intr_info, u32 *error_code) {}
 
 static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }