[PATCH v1 4/4] iommu/hyperv: Add page-selective IOTLB flush support

From: Yu Zhang

Date: Mon May 11 2026 - 12:55:32 EST


Add page-selective IOTLB flush using HVCALL_FLUSH_DEVICE_DOMAIN_LIST.
This hypercall accepts a list of (page_number, page_mask_shift) entries,
enabling finer-grained IOTLB invalidation compared to the domain-wide
HVCALL_FLUSH_DEVICE_DOMAIN used by hv_iommu_flush_iotlb_all().

hv_iommu_fill_iova_list() decomposes a contiguous IOVA range into a
minimal set of aligned power-of-two regions that fit in a single
hypercall input page. When the range exceeds the page capacity, the
code falls back to a full domain flush automatically.

Signed-off-by: Yu Zhang <zhangyu1@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: Easwar Hariharan <easwar.hariharan@xxxxxxxxxxxxxxxxxxx>
---
drivers/iommu/hyperv/iommu.c | 91 +++++++++++++++++++++++++++++++++++-
include/hyperv/hvgdk_mini.h | 1 +
include/hyperv/hvhdk_mini.h | 17 +++++++
3 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/hyperv/iommu.c b/drivers/iommu/hyperv/iommu.c
index e5fc625314b5..3bca362b7815 100644
--- a/drivers/iommu/hyperv/iommu.c
+++ b/drivers/iommu/hyperv/iommu.c
@@ -486,10 +486,98 @@ static void hv_iommu_flush_iotlb_all(struct iommu_domain *domain)
hv_flush_device_domain(to_hv_iommu_domain(domain));
}

+/* Max number of iova_list entries in a single hypercall input page. */
+#define HV_IOMMU_MAX_FLUSH_VA_COUNT \
+ ((HV_HYP_PAGE_SIZE - sizeof(struct hv_input_flush_device_domain_list)) / \
+ sizeof(union hv_iommu_flush_va))
+
+/* Returned by hv_iommu_fill_iova_list() when the range exceeds the capacity */
+#define HV_IOMMU_FLUSH_VA_OVERFLOW U16_MAX
+
+static inline u16 hv_iommu_fill_iova_list(union hv_iommu_flush_va *iova_list,
+ unsigned long start,
+ unsigned long end)
+{
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long end_pfn = PAGE_ALIGN(end) >> PAGE_SHIFT;
+ unsigned long nr_pages = end_pfn - start_pfn;
+ u16 count = 0;
+
+ while (nr_pages > 0) {
+ unsigned long flush_pages;
+ int order;
+ unsigned long pfn_align;
+ unsigned long size_align;
+
+ if (count >= HV_IOMMU_MAX_FLUSH_VA_COUNT) {
+ count = HV_IOMMU_FLUSH_VA_OVERFLOW;
+ break;
+ }
+
+ if (start_pfn)
+ pfn_align = __ffs(start_pfn);
+ else
+ pfn_align = BITS_PER_LONG - 1;
+
+ size_align = __fls(nr_pages);
+ order = min(pfn_align, size_align);
+ iova_list[count].page_mask_shift = order;
+ iova_list[count].page_number = start_pfn;
+
+ flush_pages = 1UL << order;
+ start_pfn += flush_pages;
+ nr_pages -= flush_pages;
+ count++;
+ }
+
+ return count;
+}
+
+static void hv_flush_device_domain_list(struct hv_iommu_domain *hv_domain,
+ struct iommu_iotlb_gather *iotlb_gather)
+{
+ u64 status;
+ u16 count;
+ unsigned long flags;
+ struct hv_input_flush_device_domain_list *input;
+
+ local_irq_save(flags);
+
+ input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+ memset(input, 0, sizeof(*input));
+
+ input->device_domain = hv_domain->device_domain;
+ input->flags |= HV_FLUSH_DEVICE_DOMAIN_LIST_IOMMU_FORMAT;
+ count = hv_iommu_fill_iova_list(input->iova_list,
+ iotlb_gather->start,
+ iotlb_gather->end);
+ if (count == HV_IOMMU_FLUSH_VA_OVERFLOW) {
+ /*
+ * Range exceeds hypercall page capacity. Fall back to a full
+ * domain flush.
+ */
+ struct hv_input_flush_device_domain *flush_all = (void *)input;
+
+ memset(flush_all, 0, sizeof(*flush_all));
+ flush_all->device_domain = hv_domain->device_domain;
+ status = hv_do_hypercall(HVCALL_FLUSH_DEVICE_DOMAIN,
+ flush_all, NULL);
+ } else {
+ status = hv_do_rep_hypercall(
+ HVCALL_FLUSH_DEVICE_DOMAIN_LIST,
+ count, 0, input, NULL);
+ }
+
+ local_irq_restore(flags);
+
+ if (!hv_result_success(status))
+ pr_err("HVCALL_FLUSH_DEVICE_DOMAIN_LIST failed, status %lld\n", status);
+}
+
static void hv_iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *iotlb_gather)
{
- hv_flush_device_domain(to_hv_iommu_domain(domain));
+ hv_flush_device_domain_list(to_hv_iommu_domain(domain), iotlb_gather);

iommu_put_pages_list(&iotlb_gather->freelist);
}
@@ -543,6 +631,7 @@ static struct iommu_domain *hv_iommu_domain_alloc_paging(struct device *dev)

cfg.common.hw_max_vasz_lg2 = hv_iommu_device->max_iova_width;
cfg.common.hw_max_oasz_lg2 = 52;
+ cfg.common.features |= BIT(PT_FEAT_FLUSH_RANGE);
cfg.top_level = (hv_iommu_device->max_iova_width > 48) ? 4 : 3;

ret = pt_iommu_x86_64_init(&hv_domain->pt_iommu_x86_64, &cfg, GFP_KERNEL);
diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h
index 5bdbb44da112..eaaf87171478 100644
--- a/include/hyperv/hvgdk_mini.h
+++ b/include/hyperv/hvgdk_mini.h
@@ -496,6 +496,7 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */
#define HVCALL_GET_GPA_PAGES_ACCESS_STATES 0x00c9
#define HVCALL_CONFIGURE_DEVICE_DOMAIN 0x00ce
#define HVCALL_FLUSH_DEVICE_DOMAIN 0x00d0
+#define HVCALL_FLUSH_DEVICE_DOMAIN_LIST 0x00d1
#define HVCALL_ACQUIRE_SPARSE_SPA_PAGE_HOST_ACCESS 0x00d7
#define HVCALL_RELEASE_SPARSE_SPA_PAGE_HOST_ACCESS 0x00d8
#define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db
diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h
index 493608e791b4..f51d5d9467f1 100644
--- a/include/hyperv/hvhdk_mini.h
+++ b/include/hyperv/hvhdk_mini.h
@@ -671,4 +671,21 @@ struct hv_input_flush_device_domain {
u32 reserved;
} __packed;

+union hv_iommu_flush_va {
+ u64 iova;
+ struct {
+ u64 page_mask_shift : 12;
+ u64 page_number : 52;
+ };
+} __packed;
+
+
+struct hv_input_flush_device_domain_list {
+ struct hv_input_device_domain device_domain;
+#define HV_FLUSH_DEVICE_DOMAIN_LIST_IOMMU_FORMAT (1 << 0)
+ u32 flags;
+ u32 reserved;
+ union hv_iommu_flush_va iova_list[];
+} __packed;
+
#endif /* _HV_HVHDK_MINI_H */
--
2.52.0