Re: [PATCH 03/14] KVM: arm64: Support host MMIO trap handlers for unmapped devices
From: Fuad Tabba
Date: Fri Mar 13 2026 - 05:35:23 EST
Hi Sebastian,
On Tue, 10 Mar 2026 at 12:49, Sebastian Ene <sebastianene@xxxxxxxxxx> wrote:
>
> Introduce a mechanism to register callbacks for MMIO accesses to regions
> unmapped from the host Stage-2 page tables.
>
> This infrastructure allows the hypervisor to intercept host accesses to
> protected or emulated devices. When a Stage-2 fault occurs on a
> registered device region, the hypervisor will invoke the associated
> callback to emulate the access.
>
> Signed-off-by: Sebastian Ene <sebastianene@xxxxxxxxxx>
> ---
> arch/arm64/include/asm/kvm_arm.h | 3 ++
> arch/arm64/include/asm/kvm_pkvm.h | 6 ++++
> arch/arm64/kvm/hyp/nvhe/mem_protect.c | 41 +++++++++++++++++++++++++++
> arch/arm64/kvm/hyp/nvhe/setup.c | 3 ++
> 4 files changed, 53 insertions(+)
>
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index 3f9233b5a130..8fe1e80ab3f4 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -304,6 +304,9 @@
>
> /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
> #define HPFAR_MASK (~UL(0xf))
> +
> +#define FAR_MASK GENMASK_ULL(11, 0)
> +
> /*
> * We have
> * PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12]
> diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
> index 48ec7d519399..5321ced2f50a 100644
> --- a/arch/arm64/include/asm/kvm_pkvm.h
> +++ b/arch/arm64/include/asm/kvm_pkvm.h
> @@ -19,9 +19,15 @@
>
> #define PKVM_PROTECTED_REGS_NUM 8
>
> +struct pkvm_protected_reg;
> +
> +typedef void (pkvm_emulate_handler)(struct pkvm_protected_reg *region, u64 offset, bool write,
> + u64 *reg, u8 reg_size);
> +
> struct pkvm_protected_reg {
> u64 start_pfn;
> size_t num_pages;
> + pkvm_emulate_handler *cb;
> };
>
> extern struct pkvm_protected_reg kvm_nvhe_sym(pkvm_protected_regs)[];
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index 7c125836b533..f405d2fbd88f 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -13,6 +13,7 @@
> #include <asm/stage2_pgtable.h>
>
> #include <hyp/fault.h>
> +#include <hyp/adjust_pc.h>
Please sort includes alphabetically.
>
> #include <nvhe/gfp.h>
> #include <nvhe/memory.h>
> @@ -608,6 +609,41 @@ static int host_stage2_idmap(u64 addr)
> return ret;
> }
>
> +static bool handle_host_mmio_trap(struct kvm_cpu_context *host_ctxt, u64 esr, u64 addr)
> +{
> + u64 offset, reg_value = 0, start, end;
> + u8 reg_size, reg_index;
> + bool write;
> + int i;
What do you plan to do if there is no valid syndrome, i.e.,
ESR_EL2.ISV == 0? I am still reviewing, so maybe this is solved in a
future patch, or maybe you know that, in practice, all instructions
would have a valid syndrome. Regardless of which it is, you should
definitely add the following check to _this_ patch (or reconsider the
approach if it is possible to get legit accesses with ESR_EL2.ISV ==
0):
+ if (!(esr & ESR_ELx_ISV))
+ return false;
> +
> + for (i = 0; i < num_protected_reg; i++) {
> + start = pkvm_protected_regs[i].start_pfn << PAGE_SHIFT;
> + end = start + (pkvm_protected_regs[i].num_pages << PAGE_SHIFT);
> +
> + if (start > addr || addr > end)
Because end is calculated by adding the size, it represents the first
byte after the region, so this should be:
+ if (start > addr || addr >= end)
> + continue;
You also need to make sure that the entire access fits within the
protected region, to avoid a malicious or misaligned cross-boundary
access, i.e.:
+ if (addr + reg_size > end)
+ return false;
> + reg_size = BIT((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
> + reg_index = (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
> + write = (esr & ESR_ELx_WNR) == ESR_ELx_WNR;
> + offset = addr - start;
> +
> + if (write)
> + reg_value = host_ctxt->regs.regs[reg_index];
You need to handle the zero register (index 31) for writes, e.g.:
+ reg_value = (reg_index == 31) ? 0 :
host_ctxt->regs.regs[reg_index];
> +
> + pkvm_protected_regs[i].cb(&pkvm_protected_regs[i], offset, write,
> + ®_value, reg_size);
> +
> + if (!write)
> + host_ctxt->regs.regs[reg_index] = reg_value;
and for reads:
+ if (!write & reg_index != 31)
Cheers,
/fuad
> +
> + kvm_skip_host_instr();
> + return true;
> + }
> +
> + return false;
> +}
> +
> void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
> {
> struct kvm_vcpu_fault_info fault;
> @@ -630,6 +666,11 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
> */
> BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));
> addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;
> + addr |= fault.far_el2 & FAR_MASK;
> +
> + if (ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_LOW && !addr_is_memory(addr) &&
> + handle_host_mmio_trap(host_ctxt, esr, addr))
> + return;
>
> ret = host_stage2_idmap(addr);
> BUG_ON(ret && ret != -EAGAIN);
> diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
> index ad5b96085e1b..f91dfebe9980 100644
> --- a/arch/arm64/kvm/hyp/nvhe/setup.c
> +++ b/arch/arm64/kvm/hyp/nvhe/setup.c
> @@ -296,6 +296,9 @@ static int unmap_protected_regions(void)
> if (ret)
> goto err_setup;
> }
> +
> + if (reg->cb)
> + reg->cb = kern_hyp_va(reg->cb);
> }
>
> return 0;
> --
> 2.53.0.473.g4a7958ca14-goog
>