[PATCH 01/14] KVM: arm64: Donate MMIO to the hypervisor
From: Sebastian Ene
Date: Tue Mar 10 2026 - 09:53:10 EST
From: Mostafa Saleh <smostafa@xxxxxxxxxx>
Add a function to donate MMIO to the hypervisor so IOMMU hypervisor
drivers can use that to protect the MMIO of IOMMU.
The initial attempt to implement this was to have a new flag to
"___pkvm_host_donate_hyp" to accept MMIO. However that had many problems,
it was quite intrusive for host/hyp to check/set page state to make it
aware of MMIO and to encode the state in the page table in that case.
Which is called in paths that can be sensitive to performance (FFA, VMs..)
As donating MMIO is very rare, and we don’t need to encode the full
state, it’s reasonable to have a separate function to do this.
It will init the host s2 page table with an invalid leaf with the owner ID
to prevent the host from mapping the page on faults.
Also, prevent kvm_pgtable_stage2_unmap() from removing owner ID from
stage-2 PTEs, as this can be triggered from recycle logic under memory
pressure. There is no code relying on this, as all ownership changes is
done via kvm_pgtable_stage2_set_owner()
For error path in IOMMU drivers, add a function to donate MMIO back
from hyp to host.
Signed-off-by: Mostafa Saleh <smostafa@xxxxxxxxxx>
---
arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 +
arch/arm64/kvm/hyp/nvhe/mem_protect.c | 90 +++++++++++++++++++
arch/arm64/kvm/hyp/pgtable.c | 9 +-
3 files changed, 94 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 5f9d56754e39..8b617e6fc0e0 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -31,6 +31,8 @@ enum pkvm_component_id {
};
extern unsigned long hyp_nr_cpus;
+int __pkvm_host_donate_hyp_mmio(u64 pfn);
+int __pkvm_hyp_donate_host_mmio(u64 pfn);
int __pkvm_prot_finalize(void);
int __pkvm_host_share_hyp(u64 pfn);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 38f66a56a766..0808367c52e5 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -784,6 +784,96 @@ int __pkvm_host_unshare_hyp(u64 pfn)
return ret;
}
+int __pkvm_host_donate_hyp_mmio(u64 pfn)
+{
+ u64 phys = hyp_pfn_to_phys(pfn);
+ void *virt = __hyp_va(phys);
+ int ret;
+ kvm_pte_t pte;
+
+ if (addr_is_memory(phys))
+ return -EINVAL;
+
+ host_lock_component();
+ hyp_lock_component();
+
+ ret = kvm_pgtable_get_leaf(&host_mmu.pgt, phys, &pte, NULL);
+ if (ret)
+ goto unlock;
+
+ if (pte && !kvm_pte_valid(pte)) {
+ ret = -EPERM;
+ goto unlock;
+ }
+
+ ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL);
+ if (ret)
+ goto unlock;
+ if (pte) {
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, PAGE_HYP_DEVICE);
+ if (ret)
+ goto unlock;
+ /*
+ * We set HYP as the owner of the MMIO pages in the host stage-2, for:
+ * - host aborts: host_stage2_adjust_range() would fail for invalid non zero PTEs.
+ * - recycle under memory pressure: host_stage2_unmap_dev_all() would call
+ * kvm_pgtable_stage2_unmap() which will not clear non zero invalid ptes (counted).
+ * - other MMIO donation: Would fail as we check that the PTE is valid or empty.
+ */
+ WARN_ON(host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, phys,
+ PAGE_SIZE, &host_s2_pool, PKVM_ID_HYP));
+unlock:
+ hyp_unlock_component();
+ host_unlock_component();
+
+ return ret;
+}
+
+int __pkvm_hyp_donate_host_mmio(u64 pfn)
+{
+ u64 phys = hyp_pfn_to_phys(pfn);
+ u64 virt = (u64)__hyp_va(phys);
+ size_t size = PAGE_SIZE;
+ int ret;
+ kvm_pte_t pte;
+
+ if (addr_is_memory(phys))
+ return -EINVAL;
+
+ host_lock_component();
+ hyp_lock_component();
+
+ ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL);
+ if (ret)
+ goto unlock;
+ if (!kvm_pte_valid(pte)) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ ret = kvm_pgtable_get_leaf(&host_mmu.pgt, phys, &pte, NULL);
+ if (ret)
+ goto unlock;
+
+ if (FIELD_GET(KVM_INVALID_PTE_OWNER_MASK, pte) != PKVM_ID_HYP) {
+ ret = -EPERM;
+ goto unlock;
+ }
+
+ WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
+ WARN_ON(host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, phys,
+ PAGE_SIZE, &host_s2_pool, PKVM_ID_HOST));
+unlock:
+ hyp_unlock_component();
+ host_unlock_component();
+
+ return ret;
+}
+
int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
{
u64 phys = hyp_pfn_to_phys(pfn);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 9b480f947da2..d954058e63ff 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1152,13 +1152,8 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
kvm_pte_t *childp = NULL;
bool need_flush = false;
- if (!kvm_pte_valid(ctx->old)) {
- if (stage2_pte_is_counted(ctx->old)) {
- kvm_clear_pte(ctx->ptep);
- mm_ops->put_page(ctx->ptep);
- }
- return 0;
- }
+ if (!kvm_pte_valid(ctx->old))
+ return stage2_pte_is_counted(ctx->old) ? -EPERM : 0;
if (kvm_pte_table(ctx->old, ctx->level)) {
childp = kvm_pte_follow(ctx->old, mm_ops);
--
2.53.0.473.g4a7958ca14-goog