Re: [PATCH 1/7] KVM: TDX: Add a place holder to handle TDX VM exit

From: Binbin Wu
Date: Sun Dec 15 2024 - 19:54:40 EST





On 12/13/2024 4:57 PM, Xiaoyao Li wrote:
On 12/1/2024 11:53 AM, Binbin Wu wrote:

[...]
+
+static int tdx_handle_triple_fault(struct kvm_vcpu *vcpu)
+{
+    vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+    vcpu->mmio_needed = 0;
+    return 0;

This function is just same as handle_triple_fault() in vmx.c, why not use it instead?
Yes, handle_triple_fault() could be moved to vmx.h can then it can be used
by tdx code.
Will do to.




  }
    void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
@@ -1135,6 +1215,88 @@ int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
      return tdx_sept_drop_private_spte(kvm, gfn, level, pfn);
  }
  +int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
+{
+    struct vcpu_tdx *tdx = to_tdx(vcpu);
+    u64 vp_enter_ret = tdx->vp_enter_ret;
+    union vmx_exit_reason exit_reason;
+
+    if (fastpath != EXIT_FASTPATH_NONE)
+        return 1;
+
+    /*
+     * Handle TDX SW errors, including TDX_SEAMCALL_UD, TDX_SEAMCALL_GP and
+     * TDX_SEAMCALL_VMFAILINVALID.
+     */
+    if (unlikely((vp_enter_ret & TDX_SW_ERROR) == TDX_SW_ERROR)) {
+        KVM_BUG_ON(!kvm_rebooting, vcpu->kvm);
+        goto unhandled_exit;
+    }
+
+    /*
+     * Without off-TD debug enabled, failed_vmentry case must have
+     * TDX_NON_RECOVERABLE set.
+     */

This comment is confusing. I'm not sure why it is put here. Below code does nothing with exit_reason.failed_vmentry.

Because when failed_vmentry occurs, vp_enter_ret will have
TDX_NON_RECOVERABLE set, so it will be handled below.


+    if (unlikely(vp_enter_ret & (TDX_ERROR | TDX_NON_RECOVERABLE))) {
+        /* Triple fault is non-recoverable. */
+        if (unlikely(tdx_check_exit_reason(vcpu, EXIT_REASON_TRIPLE_FAULT)))
+            return tdx_handle_triple_fault(vcpu);
+
+        kvm_pr_unimpl("TD vp_enter_ret 0x%llx, hkid 0x%x hkid pa 0x%llx\n",
+                  vp_enter_ret, to_kvm_tdx(vcpu->kvm)->hkid,
+                  set_hkid_to_hpa(0, to_kvm_tdx(vcpu->kvm)->hkid));

It indeed needs clarification for the need of "hkid" and "hkid pa". Especially the "hkdi pa", which is the result of applying HKID of the current TD to a physical address 0. I cannot think of any reason why we need such info.
Yes, set_hkid_to_hpa(0, to_kvm_tdx(vcpu->kvm)->hkid) should be removed.
I didn't notice it.
Thanks!



+        goto unhandled_exit;
+    }
+
+    /* From now, the seamcall status should be TDX_SUCCESS. */
+    WARN_ON_ONCE((vp_enter_ret & TDX_SEAMCALL_STATUS_MASK) != TDX_SUCCESS);

Is there any case that TDX_SUCCESS with additional non-zero information in the lower 32-bits? I thought TDX_SUCCESS is a whole 64-bit status code.
TDX status code uses the upper 32-bits.

When the status code is TDX_SUCCESS and has a valid VMX exit reason, the lower
32-bit is the VMX exit reason.

You can refer to the TDX module ABI spec or interface_function_completion_status.json
from the intel-tdx-module-1.5-abi-table for details.



+    exit_reason = tdexit_exit_reason(vcpu);
+
+    switch (exit_reason.basic) {
+    default:
+        break;
+    }
+
+unhandled_exit:
+    vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+    vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+    vcpu->run->internal.ndata = 2;
+    vcpu->run->internal.data[0] = vp_enter_ret;
+    vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+    return 0;
+}
+
+void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
+        u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code)
+{
+    struct vcpu_tdx *tdx = to_tdx(vcpu);
+
+    if (tdx_has_exit_reason(vcpu)) {
+        /*
+         * Encode some useful info from the the 64 bit return code
+         * into the 32 bit exit 'reason'. If the VMX exit reason is
+         * valid, just set it to those bits.
+         */
+        *reason = (u32)tdx->vp_enter_ret;
+        *info1 = tdexit_exit_qual(vcpu);
+        *info2 = tdexit_ext_exit_qual(vcpu);
+    } else {
+        /*
+         * When the VMX exit reason in vp_enter_ret is not valid,
+         * overload the VMX_EXIT_REASONS_FAILED_VMENTRY bit (31) to
+         * mean the vmexit code is not valid. Set the other bits to
+         * try to avoid picking a value that may someday be a valid
+         * VMX exit code.
+         */
+        *reason = 0xFFFFFFFF;
+        *info1 = 0;
+        *info2 = 0;
+    }
+
+    *intr_info = tdexit_intr_info(vcpu);
+    *error_code = 0;
+}
+
  static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
  {
      const struct tdx_sys_info_td_conf *td_conf = &tdx_sysinfo->td_conf;
diff --git a/arch/x86/kvm/vmx/tdx_errno.h b/arch/x86/kvm/vmx/tdx_errno.h
index f9dbb3a065cc..6ff4672c4181 100644
--- a/arch/x86/kvm/vmx/tdx_errno.h
+++ b/arch/x86/kvm/vmx/tdx_errno.h
@@ -10,6 +10,9 @@
   * TDX SEAMCALL Status Codes (returned in RAX)
   */
  #define TDX_NON_RECOVERABLE_VCPU        0x4000000100000000ULL
+#define TDX_NON_RECOVERABLE_TD            0x4000000200000000ULL
+#define TDX_NON_RECOVERABLE_TD_NON_ACCESSIBLE 0x6000000500000000ULL
+#define TDX_NON_RECOVERABLE_TD_WRONG_APIC_MODE 0x6000000700000000ULL

Not the fault of this patch.

There are other Status code defined in arch/x86/include/asm/tdx.h

  /*
   * TDX module SEAMCALL leaf function error codes
   */
  #define TDX_SUCCESS        0ULL
  #define TDX_RND_NO_ENTROPY    0x8000020300000000ULL

It's better to put them in one single place.
Agree.

Thanks!

  #define TDX_INTERRUPTED_RESUMABLE 0x8000000300000000ULL
  #define TDX_OPERAND_INVALID            0xC000010000000000ULL
  #define TDX_OPERAND_BUSY            0x8000020000000000ULL
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 02b33390e1bf..1c18943e0e1d 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -133,6 +133,10 @@ int tdx_vcpu_pre_run(struct kvm_vcpu *vcpu);
  fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit);
  void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
  void tdx_vcpu_put(struct kvm_vcpu *vcpu);
+int tdx_handle_exit(struct kvm_vcpu *vcpu,
+        enum exit_fastpath_completion fastpath);
+void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
+        u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code);
    int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
  @@ -167,6 +171,10 @@ static inline fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediat
  }
  static inline void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) {}
  static inline void tdx_vcpu_put(struct kvm_vcpu *vcpu) {}
+static inline int tdx_handle_exit(struct kvm_vcpu *vcpu,
+        enum exit_fastpath_completion fastpath) { return 0; }
+static inline void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, u64 *info1,
+                     u64 *info2, u32 *intr_info, u32 *error_code) {}
    static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }