Re: [PATCH 4/6] KVM: arm64: Move hyp state to hyp_vmemmap
From: Marc Zyngier
Date: Fri Mar 14 2025 - 07:31:52 EST
On Thu, 27 Feb 2025 00:33:08 +0000,
Quentin Perret <qperret@xxxxxxxxxx> wrote:
>
> Tracking the hypervisor's ownership state into struct hyp_page has
> several benefits, including allowing far more efficient lookups (no
> page-table walk needed) and de-corelating the state from the presence
> of a mapping. This will later allow to map pages into EL2 stage-1 less
> proactively which is generally a good thing for security. And in the
> future this will help with tracking the state of pages mapped into the
> hypervisor's private range without requiring an alias into the 'linear
> map' range.
>
> Signed-off-by: Quentin Perret <qperret@xxxxxxxxxx>
> ---
> arch/arm64/kvm/hyp/include/nvhe/memory.h | 20 +++++++++-
> arch/arm64/kvm/hyp/nvhe/mem_protect.c | 51 ++++++++++++------------
> arch/arm64/kvm/hyp/nvhe/setup.c | 6 ++-
> 3 files changed, 49 insertions(+), 28 deletions(-)
>
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
> index 4a3c55d26ef3..cc4c01158368 100644
> --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
> +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
> @@ -22,6 +22,7 @@ enum pkvm_page_state {
> /* Meta-states which aren't encoded directly in the PTE's SW bits */
> PKVM_NOPAGE = BIT(0) | BIT(1),
> };
> +#define PKVM_PAGE_STATE_MASK (BIT(0) | BIT(1))
>
> #define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
> static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
> @@ -42,7 +43,14 @@ struct hyp_page {
> u8 order;
>
> /* Host (non-meta) state. Guarded by the host stage-2 lock. */
> - unsigned __host_state : 8;
> + unsigned __host_state : 4;
> +
> + /*
> + * Complement of the hyp (non-meta) state. Guarded by the hyp stage-1 lock. We use the
> + * complement so that the initial 0 in __hyp_state_comp (due to the entire vmemmap starting
> + * off zeroed) encodes PKVM_NOPAGE.
> + */
> + unsigned __hyp_state_comp : 4;
>
> u32 host_share_guest_count;
> };
> @@ -89,6 +97,16 @@ static inline void set_host_state(phys_addr_t phys, enum pkvm_page_state state)
> hyp_phys_to_page(phys)->__host_state = state;
> }
>
> +static inline enum pkvm_page_state get_hyp_state(phys_addr_t phys)
> +{
> + return hyp_phys_to_page(phys)->__hyp_state_comp ^ PKVM_PAGE_STATE_MASK;
> +}
> +
> +static inline void set_hyp_state(phys_addr_t phys, enum pkvm_page_state state)
> +{
> + hyp_phys_to_page(phys)->__hyp_state_comp = state ^ PKVM_PAGE_STATE_MASK;
> +}
> +
> /*
> * Refcounting for 'struct hyp_page'.
> * hyp_pool::lock must be held if atomic access to the refcount is required.
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index a45ffdec7612..3ab8c81500c2 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -642,24 +642,24 @@ static int __host_set_page_state_range(u64 addr, u64 size,
> return 0;
> }
>
> -static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
> +static void __hyp_set_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)
> {
> - if (!kvm_pte_valid(pte))
> - return PKVM_NOPAGE;
> + phys_addr_t end = phys + size;
>
> - return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
> + for (; phys < end; phys += PAGE_SIZE)
> + set_hyp_state(phys, state);
> }
>
> -static int __hyp_check_page_state_range(u64 addr, u64 size,
> - enum pkvm_page_state state)
> +static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)
> {
> - struct check_walk_data d = {
> - .desired = state,
> - .get_page_state = hyp_get_page_state,
> - };
> + phys_addr_t end = phys + size;
> +
> + for (; phys < end; phys += PAGE_SIZE) {
> + if (get_hyp_state(phys) != state)
> + return -EPERM;
> + }
>
> - hyp_assert_lock_held(&pkvm_pgd_lock);
> - return check_page_state_range(&pkvm_pgtable, addr, size, &d);
> + return 0;
> }
>
> static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
> @@ -687,7 +687,6 @@ int __pkvm_host_share_hyp(u64 pfn)
> {
> u64 phys = hyp_pfn_to_phys(pfn);
> void *virt = __hyp_va(phys);
> - enum kvm_pgtable_prot prot;
> u64 size = PAGE_SIZE;
> int ret;
>
> @@ -698,13 +697,13 @@ int __pkvm_host_share_hyp(u64 pfn)
> if (ret)
> goto unlock;
> if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
> - ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
> + ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);
OK, I think I finally clicked here. Does it mean that all the tracking
is now done in terms of PAs instead of VAs?
> if (ret)
> goto unlock;
> }
>
> - prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
> - WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot));
> + __hyp_set_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
> + WARN_ON(pkvm_create_mappings_locked(virt, virt + size, PAGE_HYP));
And this is the split between the state now being kept in the on a PA
base and the actual mapping that is now only takes the page attributes
and no SW bits?
Thanks,
M.
--
Without deviation from the norm, progress is not possible.