[RFC PATCH 18/42] iommu/vt-d: Support of IOMMU_DOMAIN_KVM domain in Intel IOMMU

From: Yan Zhao
Date: Sat Dec 02 2023 - 04:53:24 EST


Add support of IOMMU_DOMAIN_KVM domain. Paging structures allocation/free,
page mapping and unmapping of this damain are managed by KVM rather than by
Intel IOMMU driver.

The meta data of paging structures of KVM domain is read from the
allocation "data" passed in from KVM through IOMMUFD. The format to parse
the meta data is defined in arch header "asm/kvm_exported_tdp.h".

KVM domain's gaw(guest witdh), agaw, pgd, max_add, max super page level are
all read from the paging structure meta data from KVM. Snoop and paging
structure coherency are forced to be true.

IOMMU hardware are checked against the requirement of KVM domain at domain
allocation phase and later device attachment phase (in a later patch).

CONFIG_INTEL_IOMMU_KVM is provided to turn on/off KVM domain support.

Signed-off-by: Yan Zhao <yan.y.zhao@xxxxxxxxx>
---
drivers/iommu/intel/Kconfig | 9 +++
drivers/iommu/intel/Makefile | 1 +
drivers/iommu/intel/iommu.c | 18 ++++-
drivers/iommu/intel/iommu.h | 5 ++
drivers/iommu/intel/kvm.c | 128 +++++++++++++++++++++++++++++++++++
5 files changed, 160 insertions(+), 1 deletion(-)
create mode 100644 drivers/iommu/intel/kvm.c

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index a4a125666293f..78078103d4280 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -108,4 +108,13 @@ config INTEL_IOMMU_PERF_EVENTS
to aid performance tuning and debug. These are available on modern
processors which support Intel VT-d 4.0 and later.

+config INTEL_IOMMU_KVM
+ bool "Support of stage 2 paging structures/mappings managed by KVM"
+ help
+ Selecting this option will enable Intel IOMMU to use paging
+ structures shared from KVM MMU as the stage 2 paging structures
+ in IOMMU hardware. The page mapping/unmapping, paging struture
+ allocation/free of this stage 2 paging structures are not managed
+ by Intel IOMMU driver, but by KVM MMU.
+
endif # INTEL_IOMMU
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index 5dabf081a7793..c097bdd6ee13d 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o
+obj-$(CONFIG_INTEL_IOMMU_KVM) += kvm.o
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 924006cda18c5..fcdee40f30ed1 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -375,6 +375,15 @@ static inline int domain_type_is_si(struct dmar_domain *domain)
return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
}

+static inline int domain_type_is_kvm(struct dmar_domain *domain)
+{
+#ifdef CONFIG_INTEL_IOMMU_KVM
+ return domain->domain.type == IOMMU_DOMAIN_KVM;
+#else
+ return false;
+#endif
+}
+
static inline int domain_pfn_supported(struct dmar_domain *domain,
unsigned long pfn)
{
@@ -1735,6 +1744,9 @@ static bool first_level_by_default(unsigned int type)
if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity())
return intel_cap_flts_sanity();

+ if (type == IOMMU_DOMAIN_KVM)
+ return false;
+
/* Both levels are available, decide it based on domain type */
return type != IOMMU_DOMAIN_UNMANAGED;
}
@@ -1826,7 +1838,8 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)

static void domain_exit(struct dmar_domain *domain)
{
- if (domain->pgd) {
+ /* pgd of kvm domain is managed by KVM */
+ if (!domain_type_is_kvm(domain) && (domain->pgd)) {
LIST_HEAD(freelist);

domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist);
@@ -4892,6 +4905,9 @@ const struct iommu_ops intel_iommu_ops = {
.hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
.domain_alloc_user = intel_iommu_domain_alloc_user,
+#ifdef CONFIG_INTEL_IOMMU_KVM
+ .domain_alloc_kvm = intel_iommu_domain_alloc_kvm,
+#endif
.probe_device = intel_iommu_probe_device,
.probe_finalize = intel_iommu_probe_finalize,
.release_device = intel_iommu_release_device,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index c76f558ae6323..8826e9248f6ed 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -1056,4 +1056,9 @@ static inline int width_to_agaw(int width)
return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
}

+#ifdef CONFIG_INTEL_IOMMU_KVM
+struct iommu_domain *
+intel_iommu_domain_alloc_kvm(struct device *dev, u32 flags, const void *data);
+#endif
+
#endif
diff --git a/drivers/iommu/intel/kvm.c b/drivers/iommu/intel/kvm.c
new file mode 100644
index 0000000000000..188ec90083051
--- /dev/null
+++ b/drivers/iommu/intel/kvm.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/iommu.h>
+#include <asm/kvm_exported_tdp.h>
+#include "iommu.h"
+
+/**
+ * Check IOMMU hardware Snoop related caps
+ *
+ * - force_snooping: Force snoop cpu caches per current KVM implementation.
+ * - scalable-mode: To enable PGSNP bit in PASIDTE to overwrite SNP
+ * bit (bit 11) in stage 2 leaves.
+ * - paging structure coherency: As KVM will not call clflush_cache_range()
+ */
+static bool is_coherency(struct intel_iommu *iommu)
+{
+ return ecap_sc_support(iommu->ecap) && sm_supported(iommu) &&
+ iommu_paging_structure_coherency(iommu);
+}
+
+static bool is_iommu_cap_compatible_to_kvm_domain(struct dmar_domain *domain,
+ struct intel_iommu *iommu)
+{
+ if (!is_coherency(iommu))
+ return false;
+
+ if (domain->iommu_superpage > fls(cap_super_page_val(iommu->cap)))
+ return false;
+
+ if (domain->agaw > iommu->agaw || domain->agaw > cap_mgaw(iommu->cap))
+ return false;
+
+ return true;
+}
+
+/*
+ * Cache coherency is always enforced in KVM domain.
+ * IOMMU hardware caps will be checked to allow the cache coherency before
+ * device attachment to the KVM domain.
+ */
+static bool kvm_domain_enforce_cache_coherency(struct iommu_domain *domain)
+{
+ return true;
+}
+
+static const struct iommu_domain_ops intel_kvm_domain_ops = {
+ .free = intel_iommu_domain_free,
+ .enforce_cache_coherency = kvm_domain_enforce_cache_coherency,
+};
+
+struct iommu_domain *
+intel_iommu_domain_alloc_kvm(struct device *dev, u32 flags, const void *data)
+{
+ bool request_nest_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
+ const struct kvm_exported_tdp_meta_vmx *tdp = data;
+ struct dmar_domain *dmar_domain;
+ struct iommu_domain *domain;
+ struct intel_iommu *iommu;
+ int adjust_width;
+
+ iommu = device_to_iommu(dev, NULL, NULL);
+
+ if (!iommu)
+ return ERR_PTR(-ENODEV);
+ /*
+ * In theroy, a KVM domain can be nested as a parent domain to a user
+ * domain. Turn it off as we don't want to handle cases like IO page
+ * fault on nested domain for now.
+ */
+ if ((request_nest_parent)) {
+ pr_err("KVM domain does not work as nested parent currently\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (!tdp || tdp->type != KVM_TDP_TYPE_EPT) {
+ pr_err("No meta data or wrong KVM TDP type\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (tdp->level != 4 && tdp->level != 5) {
+ pr_err("Unsupported KVM TDP level %d in IOMMU\n", tdp->level);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ dmar_domain = alloc_domain(IOMMU_DOMAIN_KVM);
+ if (!dmar_domain)
+ return ERR_PTR(-ENOMEM);
+
+ if (dmar_domain->use_first_level)
+ WARN_ON("KVM domain is applying to IOMMU flpt\n");
+
+ domain = &dmar_domain->domain;
+ domain->ops = &intel_kvm_domain_ops;
+ domain->type = IOMMU_DOMAIN_KVM;
+
+ /* read dmar domain meta data from "tdp" */
+ dmar_domain->gaw = tdp->level == 4 ? ADDR_WIDTH_4LEVEL : ADDR_WIDTH_5LEVEL;
+ adjust_width = guestwidth_to_adjustwidth(dmar_domain->gaw);
+ dmar_domain->agaw = width_to_agaw(adjust_width);
+ dmar_domain->iommu_superpage = tdp->max_huge_page_level - 1;
+ dmar_domain->max_addr = (1 << dmar_domain->gaw);
+ dmar_domain->pgd = phys_to_virt(tdp->root_hpa);
+
+ dmar_domain->nested_parent = false;
+ dmar_domain->dirty_tracking = false;
+
+ /*
+ * force_snooping and paging strucure coherency in KVM domain
+ * IOMMU hareware cap will be checked before device attach
+ */
+ dmar_domain->force_snooping = true;
+ dmar_domain->iommu_coherency = true;
+
+ /* no need to let iommu_map/unmap see pgsize_bitmap */
+ domain->pgsize_bitmap = 0;
+
+ /* force aperture */
+ domain->geometry.aperture_start = 0;
+ domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
+ domain->geometry.force_aperture = true;
+
+ if (!is_iommu_cap_compatible_to_kvm_domain(dmar_domain, iommu)) {
+ pr_err("Unsupported KVM TDP\n");
+ kfree(dmar_domain);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ return domain;
+}
--
2.17.1