Re: [PATCH v1 1/2] KVM: x86/mmu: Allow for overriding MMIO SPTE mask

From: Tom Lendacky
Date: Wed Dec 18 2019 - 14:51:30 EST


On 12/18/19 1:45 PM, Tom Lendacky wrote:
> The KVM MMIO support uses bit 51 as the reserved bit to cause nested page
> faults when a guest performs MMIO. The AMD memory encryption support uses
> CPUID functions to define the encryption bit position. Given this, KVM
> can't assume that bit 51 will be safe all the time.
>
> Add a callback to return a reserved bit(s) mask that can be used for the
> MMIO pagetable entries. The callback is not responsible for setting the
> present bit.
>
> If a callback is registered:
> - any non-zero mask returned is updated with the present bit and used
> as the MMIO SPTE mask.
> - a zero mask returned results in a mask with only bit 51 set (i.e. no
> present bit) as the MMIO SPTE mask, similar to the way 52-bit physical
> addressing is handled.
>
> If no callback is registered, the current method of setting the MMIO SPTE
> mask is used.
>
> Fixes: 28a1f3ac1d0c ("kvm: x86: Set highest physical address bits in non-present/reserved SPTEs")
> Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 4 ++-
> arch/x86/kvm/mmu/mmu.c | 54 +++++++++++++++++++++------------
> arch/x86/kvm/x86.c | 2 +-
> 3 files changed, 38 insertions(+), 22 deletions(-)

This patch has some extra churn because kvm_x86_ops isn't set yet when the
call to kvm_set_mmio_spte_mask() is made. If it's not a problem to move
setting kvm_x86_ops just a bit earlier in kvm_arch_init(), some of the
churn can be avoided.

Thanks,
Tom

>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b79cd6aa4075..0c666c10f1a2 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1233,6 +1233,8 @@ struct kvm_x86_ops {
>
> bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
> int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> +
> + u64 (*get_reserved_mask)(void);
> };
>
> struct kvm_arch_async_pf {
> @@ -1266,7 +1268,7 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
> return -ENOTSUPP;
> }
>
> -int kvm_mmu_module_init(void);
> +int kvm_mmu_module_init(struct kvm_x86_ops *ops);
> void kvm_mmu_module_exit(void);
>
> void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 6f92b40d798c..d419df7a4056 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -6227,30 +6227,44 @@ static void mmu_destroy_caches(void)
> kmem_cache_destroy(mmu_page_header_cache);
> }
>
> -static void kvm_set_mmio_spte_mask(void)
> +static void kvm_set_mmio_spte_mask(struct kvm_x86_ops *ops)
> {
> u64 mask;
>
> - /*
> - * Set the reserved bits and the present bit of an paging-structure
> - * entry to generate page fault with PFER.RSV = 1.
> - */
> + if (ops->get_reserved_mask) {
> + mask = ops->get_reserved_mask();
>
> - /*
> - * Mask the uppermost physical address bit, which would be reserved as
> - * long as the supported physical address width is less than 52.
> - */
> - mask = 1ull << 51;
> + /*
> + * If there are reserved bits available, add the present bit
> + * to the mask to generate a page fault with PFER.RSV = 1.
> + * If there are no reserved bits available, mask the uppermost
> + * physical address bit, but keep the present bit cleared.
> + */
> + if (mask)
> + mask |= 1ull;
> + else
> + mask = 1ull << 51;
> + } else {
> + /*
> + * Set the reserved bits and the present bit of a
> + * paging-structure entry to generate page fault with
> + * PFER.RSV = 1.
> + */
>
> - /* Set the present bit. */
> - mask |= 1ull;
> + /*
> + * Mask the uppermost physical address bit, which would be
> + * reserved as long as the supported physical address width
> + * is less than 52.
> + */
> + mask = 1ull << 51;
>
> - /*
> - * If reserved bit is not supported, clear the present bit to disable
> - * mmio page fault.
> - */
> - if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
> - mask &= ~1ull;
> + /*
> + * If reserved bit is not supported, don't set the present bit
> + * to disable mmio page fault.
> + */
> + if (!IS_ENABLED(CONFIG_X86_64) || shadow_phys_bits != 52)
> + mask |= 1ull;
> + }
>
> kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
> }
> @@ -6301,7 +6315,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
> return 0;
> }
>
> -int kvm_mmu_module_init(void)
> +int kvm_mmu_module_init(struct kvm_x86_ops *ops)
> {
> int ret = -ENOMEM;
>
> @@ -6320,7 +6334,7 @@ int kvm_mmu_module_init(void)
>
> kvm_mmu_reset_all_pte_masks();
>
> - kvm_set_mmio_spte_mask();
> + kvm_set_mmio_spte_mask(ops);
>
> pte_list_desc_cache = kmem_cache_create("pte_list_desc",
> sizeof(struct pte_list_desc),
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 3ed167e039e5..311da4ed423d 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7234,7 +7234,7 @@ int kvm_arch_init(void *opaque)
> goto out_free_x86_fpu_cache;
> }
>
> - r = kvm_mmu_module_init();
> + r = kvm_mmu_module_init(ops);
> if (r)
> goto out_free_percpu;
>
>