+
+static void tdx_reclaim_control_page(unsigned long td_page_pa)
+{
+ WARN_ON_ONCE(!td_page_pa);
+
+ /*
+ * TDCX are being reclaimed. TDX module maps TDCX with HKID
+ * assigned to the TD. Here the cache associated to the TD
+ * was already flushed by TDH.PHYMEM.CACHE.WB before here, So
+ * cache doesn't need to be flushed again.
+ */
+ if (tdx_reclaim_page(td_page_pa))
+ /*
+ * Leak the page on failure:
+ * tdx_reclaim_page() returns an error if and only if there's an
+ * unexpected, fatal error, e.g. a SEAMCALL with bad params,
+ * incorrect concurrency in KVM, a TDX Module bug, etc.
+ * Retrying at a later point is highly unlikely to be
+ * successful.
+ * No log here as tdx_reclaim_page() already did.
+ */
+ return;
+ free_page((unsigned long)__va(td_page_pa));
+}
+
+static void tdx_do_tdh_phymem_cache_wb(void *unused)
+{
+ u64 err = 0;
+
+ do {
+ err = tdh_phymem_cache_wb(!!err);
+ } while (err == TDX_INTERRUPTED_RESUMABLE);
+
+ /* Other thread may have done for us. */
+ if (err == TDX_NO_HKID_READY_TO_WBCACHE)
+ err = TDX_SUCCESS;
+ if (WARN_ON_ONCE(err))
+ pr_tdx_error(TDH_PHYMEM_CACHE_WB, err, NULL);
+}
+
+void tdx_mmu_release_hkid(struct kvm *kvm)
+{
+ bool packages_allocated, targets_allocated;
+ struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+ cpumask_var_t packages, targets;
+ u64 err;
+ int i;
+
+ if (!is_hkid_assigned(kvm_tdx))
+ return;
+
+ if (!is_td_created(kvm_tdx)) {
+ tdx_hkid_free(kvm_tdx);
+ return;
+ }
+
+ packages_allocated = zalloc_cpumask_var(&packages, GFP_KERNEL);
+ targets_allocated = zalloc_cpumask_var(&targets, GFP_KERNEL);
+ cpus_read_lock();
+
+ /*
+ * We can destroy multiple guest TDs simultaneously. Prevent
+ * tdh_phymem_cache_wb from returning TDX_BUSY by serialization.
+ */
+ mutex_lock(&tdx_lock);
+
+ /*
+ * Go through multiple TDX HKID state transitions with three SEAMCALLs
+ * to make TDH.PHYMEM.PAGE.RECLAIM() usable.
+ * to other functions to operate private pages and Secure-EPT pages.
+ *
+ * Avoid race for kvm_gmem_release() to call kvm_mmu_unmap_gfn_range().
+ * This function is called via mmu notifier, mmu_release().
+ * kvm_gmem_release() is called via fput() on process exit.
+ */
+ write_lock(&kvm->mmu_lock);
+
+ for_each_online_cpu(i) {
+ if (packages_allocated &&
+ cpumask_test_and_set_cpu(topology_physical_package_id(i),
+ packages))
+ continue;
+ if (targets_allocated)
+ cpumask_set_cpu(i, targets);
+ }
+ if (targets_allocated)
+ on_each_cpu_mask(targets, tdx_do_tdh_phymem_cache_wb, NULL, true);
+ else
+ on_each_cpu(tdx_do_tdh_phymem_cache_wb, NULL, true);
+ /*
+ * In the case of error in tdx_do_tdh_phymem_cache_wb(), the following
+ * tdh_mng_key_freeid() will fail.
+ */
+ err = tdh_mng_key_freeid(kvm_tdx->tdr_pa);
+ if (WARN_ON_ONCE(err)) {
+ pr_tdx_error(TDH_MNG_KEY_FREEID, err, NULL);
+ pr_err("tdh_mng_key_freeid() failed. HKID %d is leaked.\n",
+ kvm_tdx->hkid);
+ } else
+ tdx_hkid_free(kvm_tdx);
+
+ write_unlock(&kvm->mmu_lock);
+ mutex_unlock(&tdx_lock);
+ cpus_read_unlock();
+ free_cpumask_var(targets);
+ free_cpumask_var(packages);
+}
+
+void tdx_vm_free(struct kvm *kvm)
+{
+ struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+ u64 err;
+ int i;
+
+ /*
+ * tdx_mmu_release_hkid() failed to reclaim HKID. Something went wrong
+ * heavily with TDX module. Give up freeing TD pages. As the function
+ * already warned, don't warn it again.
+ */
+ if (is_hkid_assigned(kvm_tdx))
+ return;
+
+ if (kvm_tdx->tdcs_pa) {
+ for (i = 0; i < tdx_info->nr_tdcs_pages; i++) {
+ if (kvm_tdx->tdcs_pa[i])
+ tdx_reclaim_control_page(kvm_tdx->tdcs_pa[i]);
+ }
+ kfree(kvm_tdx->tdcs_pa);
+ kvm_tdx->tdcs_pa = NULL;
+ }
+
+ if (!kvm_tdx->tdr_pa)
+ return;
+ if (__tdx_reclaim_page(kvm_tdx->tdr_pa))
+ return;
+ /*
+ * TDX module maps TDR with TDX global HKID. TDX module may access TDR
+ * while operating on TD (Especially reclaiming TDCS). Cache flush with > + * TDX global HKID is needed.
+ */
+ err = tdh_phymem_page_wbinvd(set_hkid_to_hpa(kvm_tdx->tdr_pa,
+ tdx_global_keyid));
+ if (WARN_ON_ONCE(err)) {
+ pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err, NULL);
+ return;
+ }
+ tdx_clear_page(kvm_tdx->tdr_pa);
+
+ free_page((unsigned long)__va(kvm_tdx->tdr_pa));
+ kvm_tdx->tdr_pa = 0;
+}
+
+static int tdx_do_tdh_mng_key_config(void *param)
+{
+ hpa_t *tdr_p = param;
+ u64 err;
+
+ do {
+ err = tdh_mng_key_config(*tdr_p);
+
+ /*
+ * If it failed to generate a random key, retry it because this
+ * is typically caused by an entropy error of the CPU's random
+ * number generator.
+ */
+ } while (err == TDX_KEY_GENERATION_FAILED);
+
+ if (WARN_ON_ONCE(err)) {
+ pr_tdx_error(TDH_MNG_KEY_CONFIG, err, NULL);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int __tdx_td_init(struct kvm *kvm);
+
+int tdx_vm_init(struct kvm *kvm)
+{
+ /*
+ * TDX has its own limit of the number of vcpus in addition to
+ * KVM_MAX_VCPUS.
+ */
+ kvm->max_vcpus = min(kvm->max_vcpus, TDX_MAX_VCPUS);
+
+ /* Place holder for TDX specific logic. */
+ return __tdx_td_init(kvm);
+}
+