Re: [PATCH 5/6] KVM: MMU: prefetch ptes when intercepted guest #PF

From: Avi Kivity
Date: Tue Jun 15 2010 - 07:41:21 EST

Next message: Uwe Kleine-König: "Re: [PATCH] tags: put function prototypes back!"
Previous message: Jean Delvare: "Re: [PATCH 6/8]i2c:i2c_core Fix warning: variable 'dummy' set but not used"
In reply to: Xiao Guangrong: "[PATCH 5/6] KVM: MMU: prefetch ptes when intercepted guest #PF"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

On 06/15/2010 05:47 AM, Xiao Guangrong wrote:

Support prefetch ptes when intercept guest #PF, avoid to #PF by later
access

If we meet any failure in the prefetch path, we will exit it and
not try other ptes to avoid become heavy path

+#define PTE_PREFETCH_NUM 16
+
#define PT_FIRST_AVAIL_BITS_SHIFT 9
#define PT64_SECOND_AVAIL_BITS_SHIFT 52

@@ -2041,6 +2043,39 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{
}

+static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
+{
+ struct kvm_mmu_page *sp;
+ int index, i;
+
+ sp = page_header(__pa(sptep));
+ WARN_ON(!sp->role.direct);
+ index = sptep - sp->spt;
+
+ for (i = index + 1; i< min(PT64_ENT_PER_PAGE,
+ index + PTE_PREFETCH_NUM); i++) {
+ gfn_t gfn;
+ pfn_t pfn;
+ u64 *spte = sp->spt + i;
+
+ if (*spte != shadow_trap_nonpresent_pte)
+ continue;
+
+ gfn = sp->gfn + (i<< ((sp->role.level - 1) * PT64_LEVEL_BITS));

Can calculate outside the loop and use +=.

Can this in fact work for level != PT_PAGE_TABLE_LEVEL? We might start at PT_PAGE_DIRECTORY_LEVEL but get 4k pages while iterating.

+
+ pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
+ if (is_error_pfn(pfn)) {
+ kvm_release_pfn_clean(pfn);
+ break;
+ }
+ if (pte_prefetch_topup_memory_cache(vcpu))
+ break;
+
+ mmu_set_spte(vcpu, spte, ACC_ALL, ACC_ALL, 0, 0, 1, NULL,
+ sp->role.level, gfn, pfn, true, false);
+ }
+}

Nice. Direct prefetch should usually succeed.

Can later augment to call get_users_pages_fast(..., PTE_PREFETCH_NUM, ...) to reduce gup overhead.

+static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, u64 *sptep)
+{
+ struct kvm_mmu_page *sp;
+ pt_element_t *table = NULL;
+ int offset = 0, shift, index, i;
+
+ sp = page_header(__pa(sptep));
+ index = sptep - sp->spt;
+
+ if (PTTYPE == 32) {
+ shift = PAGE_SHIFT - (PT_LEVEL_BITS -
+ PT64_LEVEL_BITS) * sp->role.level;
+ offset = sp->role.quadrant<< shift;
+ }
+
+ for (i = index + 1; i< min(PT64_ENT_PER_PAGE,
+ index + PTE_PREFETCH_NUM); i++) {
+ struct page *page;
+ pt_element_t gpte;
+ unsigned pte_access;
+ u64 *spte = sp->spt + i;
+ gfn_t gfn;
+ pfn_t pfn;
+ int dirty;
+
+ if (*spte != shadow_trap_nonpresent_pte)
+ continue;
+
+ pte_access = sp->role.access;
+ if (sp->role.direct) {
+ dirty = 1;
+ gfn = sp->gfn + (i<< ((sp->role.level - 1) *
+ PT64_LEVEL_BITS));
+ goto gfn_mapping;
+ }

Should just call direct_pte_prefetch.

+
+ if (!table) {
+ page = gfn_to_page_atomic(vcpu->kvm, sp->gfn);
+ if (is_error_page(page)) {
+ kvm_release_page_clean(page);
+ break;
+ }
+ table = kmap_atomic(page, KM_USER0);
+ table = (pt_element_t *)((char *)table + offset);
+ }

Why not kvm_read_guest_atomic()? Can do it outside the loop.

+
+ gpte = table[i];
+ if (!(gpte& PT_ACCESSED_MASK))
+ continue;
+
+ if (!is_present_gpte(gpte)) {
+ if (!sp->unsync)
+ *spte = shadow_notrap_nonpresent_pte;

Need __set_spte().

+ continue;
+ }
+ dirty = is_dirty_gpte(gpte);
+ gfn = (gpte& PT64_BASE_ADDR_MASK)>> PAGE_SHIFT;
+ pte_access = pte_access& FNAME(gpte_access)(vcpu, gpte);
+gfn_mapping:
+ pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
+ if (is_error_pfn(pfn)) {
+ kvm_release_pfn_clean(pfn);
+ break;
+ }
+
+ if (pte_prefetch_topup_memory_cache(vcpu))
+ break;
+ mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
+ dirty, NULL, sp->role.level, gfn, pfn,
+ true, false);
+ }
+ if (table)
+ kunmap_atomic((char *)table - offset, KM_USER0);
+}

I think lot of code can be shared with the pte prefetch in invlpg.

+
/*
* Fetch a shadow pte for a specific level in the paging hierarchy.
*/
@@ -322,6 +397,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
is_dirty_gpte(gw->ptes[gw->level-1]),
ptwrite, level,
gw->gfn, pfn, false, true);
+ FNAME(pte_prefetch)(vcpu, sptep);
break;
}

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Uwe Kleine-König: "Re: [PATCH] tags: put function prototypes back!"
Previous message: Jean Delvare: "Re: [PATCH 6/8]i2c:i2c_core Fix warning: variable 'dummy' set but not used"
In reply to: Xiao Guangrong: "[PATCH 5/6] KVM: MMU: prefetch ptes when intercepted guest #PF"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]