Re: [V4 4/8] KVM: selftests: handle encryption bits in page tables
From: Sean Christopherson
Date: Thu Oct 06 2022 - 13:34:30 EST
On Mon, Aug 29, 2022, Peter Gonda wrote:
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index 53b9a509c1d5..de13be62d52d 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -1388,6 +1388,58 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
> }
> }
>
> +/*
> + * Mask off any special bits from raw GPA
> + *
> + * Input Args:
> + * vm - Virtual Machine
> + * gpa_raw - Raw VM physical address
> + *
> + * Output Args: None
> + *
> + * Return:
> + * GPA with special bits (e.g. shared/encrypted) masked off.
> + */
> +vm_paddr_t addr_raw2gpa(struct kvm_vm *vm, vm_paddr_t gpa_raw)
> +{
> + if (!vm->memcrypt.has_enc_bit)
> + return gpa_raw;
> +
> + return gpa_raw & ~(1ULL << vm->memcrypt.enc_bit);
1. The notion of stealing GPA bits to tag the page should not be tied to memory
encryption.
2. Assuming that the shared vs. private bit is active-high is wrong, e.g. TDX
has active-low behavior.
3. "raw" is not super untuitive.
4. addr_gpa2raw() should not exist. It assumes the "raw" address always wants
the encryption bit set.
5. enc_by_default is an SEV-centric flag that should not exist outside of x86.
I think the easiest and most familiar solution for #1 will be to borrow the
kernel's tag/untag terminology for handling virtual addresses that can be tagged
for ASAN, e.g. ARM's top-byte-ignore and x86's linear address masking (LAM).
So instead of addr_raw2gpa(), just do:
static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_vaddr_t gpa)
{
return gpa & ~vm->gpa_tag_mask;
}
That way zero-allocating the VM will Just Work.
> diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
> index 2e6e61bbe81b..b2df259ce706 100644
> --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
> +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
> @@ -118,7 +118,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
>
> /* If needed, create page map l4 table. */
> if (!vm->pgd_created) {
> - vm->pgd = vm_alloc_page_table(vm);
> + vm->pgd = addr_gpa2raw(vm, vm_alloc_page_table(vm));
Rather than add "struct vm_memcrypt", I think it makes sense to introduce
"struct kvm_vm_arch" and then the x86 implementation can add pte_me_mask, similar
to what KVM does for SPTEs. THen this code because
vm->pgd = vm_alloc_page_table(vm) | vm->arch.pte_me_mask;
That will Just Work for TDX, because its pte_me_mask will be '0', even though it
effectively has an encryption bit (active-low).
> vm->pgd_created = true;
> }
> }
> @@ -140,13 +140,15 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
> int target_level)
> {
> uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level);
> + uint64_t paddr_raw = addr_gpa2raw(vm, paddr);
>
> if (!(*pte & PTE_PRESENT_MASK)) {
> *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
> if (current_level == target_level)
> - *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
> + *pte |= PTE_LARGE_MASK | (paddr_raw & PHYSICAL_PAGE_MASK);
> else
> - *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
> + *pte |= addr_gpa2raw(vm, vm_alloc_page_table(vm)) & PHYSICAL_PAGE_MASK;
Prefer to write this as:
*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
*pte |= vm->arch.pte_me_mask;
so that selftests don't assume the encryption bit is stolen from the GPA.
> +
> } else {
> /*
> * Entry already present. Assert that the caller doesn't want
> @@ -184,6 +186,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
> "Physical address beyond maximum supported,\n"
> " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
> paddr, vm->max_gfn, vm->page_size);
> + TEST_ASSERT(addr_raw2gpa(vm, paddr) == paddr,
> + "Unexpected bits in paddr: %lx", paddr);
>
> /*
> * Allocate upper level page tables, if not already present. Return
> @@ -206,7 +210,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
> pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K);
> TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
> "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
> - *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
> + *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK |
> + (addr_gpa2raw(vm, paddr) & PHYSICAL_PAGE_MASK);
And with the above suggestions, this can become something like:
if (vm_is_gpa_encrypted(vm, paddr))
*pte |= vm->arch.c_bit;
else
*pte |= vm->arch.s_bit;
where SEV sets c_bit and TDX sets s_bit.
> void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
> @@ -515,7 +520,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
> if (!(pte[index[0]] & PTE_PRESENT_MASK))
> goto unmapped_gva;
>
> - return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK);
> + return addr_raw2gpa(vm, PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK);
Aha! A use for my rework[*] of this mess! I don't think you need to take a
dependency on that work, at a glance the conflicts should be minor, i.e. doesn't
matter that much which lands first.
[*] https://lore.kernel.org/all/20221006004512.666529-1-seanjc@xxxxxxxxxx