Re: [PATCH 3/8] kvm/mmu: rename is_largepage_backed tomapping_level

From: Marcelo Tosatti
Date: Tue Jun 23 2009 - 12:50:54 EST


Hi Joerg,

On Fri, Jun 19, 2009 at 03:16:24PM +0200, Joerg Roedel wrote:
> With the new name and the corresponding backend changes this function
> can now support multiple hugepage sizes.
>
> Signed-off-by: Joerg Roedel <joerg.roedel@xxxxxxx>
> ---
> arch/x86/kvm/mmu.c | 100 +++++++++++++++++++++++++++++--------------
> arch/x86/kvm/paging_tmpl.h | 4 +-
> 2 files changed, 69 insertions(+), 35 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 1f24d88..3fa6009 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -390,37 +390,52 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
> * Return the pointer to the largepage write count for a given
> * gfn, handling slots that are not large page aligned.
> */
> -static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot)
> +static int *slot_largepage_idx(gfn_t gfn,
> + struct kvm_memory_slot *slot,
> + int level)
> {
> unsigned long idx;
>
> - idx = (gfn / KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL)) -
> - (slot->base_gfn / KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL));
> - return &slot->lpage_info[0][idx].write_count;
> + idx = (gfn / KVM_PAGES_PER_HPAGE(level)) -
> + (slot->base_gfn / KVM_PAGES_PER_HPAGE(level));
> + return &slot->lpage_info[level - 2][idx].write_count;
> }
>
> static void account_shadowed(struct kvm *kvm, gfn_t gfn)
> {
> + struct kvm_memory_slot *slot;
> int *write_count;
> + int i;
>
> gfn = unalias_gfn(kvm, gfn);
> - write_count = slot_largepage_idx(gfn,
> - gfn_to_memslot_unaliased(kvm, gfn));
> - *write_count += 1;
> +
> + for (i = PT_DIRECTORY_LEVEL;
> + i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
> + slot = gfn_to_memslot_unaliased(kvm, gfn);

Can't you move this call out of the loop?

> + write_count = slot_largepage_idx(gfn, slot, i);
> + *write_count += 1;
> + }
> }
>
> static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
> {
> + struct kvm_memory_slot *slot;
> int *write_count;
> + int i;
>
> gfn = unalias_gfn(kvm, gfn);
> - write_count = slot_largepage_idx(gfn,
> - gfn_to_memslot_unaliased(kvm, gfn));
> - *write_count -= 1;
> - WARN_ON(*write_count < 0);
> + for (i = PT_DIRECTORY_LEVEL;
> + i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
> + slot = gfn_to_memslot_unaliased(kvm, gfn);
> + write_count = slot_largepage_idx(gfn, slot, i);
> + *write_count -= 1;
> + WARN_ON(*write_count < 0);
> + }
> }
>
> -static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
> +static int has_wrprotected_page(struct kvm *kvm,
> + gfn_t gfn,
> + int level)
> {
> struct kvm_memory_slot *slot;
> int *largepage_idx;
> @@ -428,47 +443,67 @@ static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
> gfn = unalias_gfn(kvm, gfn);
> slot = gfn_to_memslot_unaliased(kvm, gfn);
> if (slot) {
> - largepage_idx = slot_largepage_idx(gfn, slot);
> + largepage_idx = slot_largepage_idx(gfn, slot, level);
> return *largepage_idx;
> }
>
> return 1;
> }
>
> -static int host_largepage_backed(struct kvm *kvm, gfn_t gfn)
> +static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
> {
> + unsigned long page_size = PAGE_SIZE;
> struct vm_area_struct *vma;
> unsigned long addr;
> - int ret = 0;
> + int i, ret = 0;
>
> addr = gfn_to_hva(kvm, gfn);
> if (kvm_is_error_hva(addr))
> - return ret;
> + return page_size;
>
> down_read(&current->mm->mmap_sem);
> vma = find_vma(current->mm, addr);
> - if (vma && is_vm_hugetlb_page(vma))
> - ret = 1;
> + if (!vma)
> + goto out;
> +
> + page_size = vma_kernel_pagesize(vma);
> +
> +out:
> up_read(&current->mm->mmap_sem);
>
> + for (i = PT_PAGE_TABLE_LEVEL;
> + i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) {
> + if (page_size >= KVM_HPAGE_SIZE(i))
> + ret = i;
> + else
> + break;
> + }
> +
> return ret;
> }
>
> -static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn)
> +static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
> {
> struct kvm_memory_slot *slot;
> -
> - if (has_wrprotected_page(vcpu->kvm, large_gfn))
> - return 0;
> -
> - if (!host_largepage_backed(vcpu->kvm, large_gfn))
> - return 0;
> + int host_level;
> + int level = PT_PAGE_TABLE_LEVEL;
>
> slot = gfn_to_memslot(vcpu->kvm, large_gfn);
> if (slot && slot->dirty_bitmap)
> - return 0;
> + return PT_PAGE_TABLE_LEVEL;
>
> - return 1;
> + host_level = host_mapping_level(vcpu->kvm, large_gfn);
> +
> + if (host_level == PT_PAGE_TABLE_LEVEL)
> + return host_level;
> +
> + for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) {
> +
> + if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
> + break;
> + }
> +
> + return level - 1;
> }
>
> /*
> @@ -1704,7 +1739,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
> if ((pte_access & ACC_WRITE_MASK)
> || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
>
> - if (largepage && has_wrprotected_page(vcpu->kvm, gfn)) {
> + if (largepage && has_wrprotected_page(vcpu->kvm, gfn, 1)) {

It seems direct_map is missing the large pte overwrite check that
fetch() contains:

if (is_large_pte(*sptep)) {
rmap_remove(vcpu->kvm, sptep);
__set_spte(sptep, shadow_trap_nonpresent_pte);
kvm_flush_remote_tlbs(vcpu->kvm);
}

(perhaps its not a possible scenario at the moment, but...).


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/