[PATCH 27 of 66] kvm mmu transparent hugepage support

From: Andrea Arcangeli
Date: Wed Nov 03 2010 - 11:34:21 EST


From: Marcelo Tosatti <mtosatti@xxxxxxxxxx>

This should work for both hugetlbfs and transparent hugepages.

Signed-off-by: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx>
Acked-by: Rik van Riel <riel@xxxxxxxxxx>
---

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -543,10 +543,20 @@ static int has_wrprotected_page(struct k

static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
{
- unsigned long page_size;
+ unsigned long page_size, addr;
int i, ret = 0;

- page_size = kvm_host_page_size(kvm, gfn);
+ page_size = kvm_host_page_size(kvm, gfn, &addr);
+
+ /* check for transparent hugepages */
+ if (page_size == PAGE_SIZE && !kvm_is_error_hva(addr)) {
+ pfn_t pfn = hva_to_pfn(kvm, addr, 0);
+
+ if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn) &&
+ PageTransCompound(pfn_to_page(pfn)))
+ page_size = KVM_HPAGE_SIZE(2);
+ kvm_release_pfn_clean(pfn);
+ }

for (i = PT_PAGE_TABLE_LEVEL;
i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) {
@@ -2283,6 +2293,8 @@ static int nonpaging_map(struct kvm_vcpu
pfn_t pfn;
unsigned long mmu_seq;

+ mmu_seq = vcpu->kvm->mmu_notifier_seq;
+ smp_rmb();
level = mapping_level(vcpu, gfn);

/*
@@ -2294,8 +2306,6 @@ static int nonpaging_map(struct kvm_vcpu

gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);

- mmu_seq = vcpu->kvm->mmu_notifier_seq;
- smp_rmb();
pfn = gfn_to_pfn(vcpu->kvm, gfn);

/* mmio */
@@ -2601,12 +2611,12 @@ static int tdp_page_fault(struct kvm_vcp
if (r)
return r;

- level = mapping_level(vcpu, gfn);
-
- gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
-
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
+ level = mapping_level(vcpu, gfn);
+
+ gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
+
pfn = gfn_to_pfn(vcpu->kvm, gfn);
if (is_error_pfn(pfn))
return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -561,13 +561,13 @@ static int FNAME(page_fault)(struct kvm_
return 0;
}

+ mmu_seq = vcpu->kvm->mmu_notifier_seq;
+ smp_rmb();
if (walker.level >= PT_DIRECTORY_LEVEL) {
level = min(walker.level, mapping_level(vcpu, walker.gfn));
walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
}

- mmu_seq = vcpu->kvm->mmu_notifier_seq;
- smp_rmb();
pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);

/* mmio */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -301,6 +301,7 @@ void kvm_set_page_dirty(struct page *pag
void kvm_set_page_accessed(struct page *page);

pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
+pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic);
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
@@ -325,7 +326,8 @@ int kvm_clear_guest_page(struct kvm *kvm
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
-unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
+unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn,
+ unsigned long *addr);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);

void kvm_vcpu_block(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -83,7 +83,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
}

/* Get the page size we could use to map */
- page_size = kvm_host_page_size(kvm, gfn);
+ page_size = kvm_host_page_size(kvm, gfn, NULL);

/* Make sure the page_size does not exceed the memslot */
while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,8 +102,36 @@ static pfn_t fault_pfn;
inline int kvm_is_mmio_pfn(pfn_t pfn)
{
if (pfn_valid(pfn)) {
- struct page *page = compound_head(pfn_to_page(pfn));
- return PageReserved(page);
+ struct page *head;
+ struct page *tail = pfn_to_page(pfn);
+ head = compound_head(tail);
+ if (head != tail) {
+ smp_rmb();
+ /*
+ * head may be a dangling pointer.
+ * __split_huge_page_refcount clears PageTail
+ * before overwriting first_page, so if
+ * PageTail is still there it means the head
+ * pointer isn't dangling.
+ */
+ if (PageTail(tail)) {
+ /*
+ * the "head" is not a dangling
+ * pointer but the hugepage may have
+ * been splitted from under us (and we
+ * may not hold a reference count on
+ * the head page so it can be reused
+ * before we run PageReferenced), so
+ * we've to recheck PageTail before
+ * returning what we just read.
+ */
+ int reserved = PageReserved(head);
+ smp_rmb();
+ if (PageTail(tail))
+ return reserved;
+ }
+ }
+ return PageReserved(tail);
}

return true;
@@ -884,7 +912,8 @@ int kvm_is_visible_gfn(struct kvm *kvm,
}
EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);

-unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
+unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn,
+ unsigned long *addrp)
{
struct vm_area_struct *vma;
unsigned long addr, size;
@@ -892,6 +921,8 @@ unsigned long kvm_host_page_size(struct
size = PAGE_SIZE;

addr = gfn_to_hva(kvm, gfn);
+ if (addrp)
+ *addrp = addr;
if (kvm_is_error_hva(addr))
return PAGE_SIZE;

@@ -946,7 +977,7 @@ unsigned long gfn_to_hva(struct kvm *kvm
}
EXPORT_SYMBOL_GPL(gfn_to_hva);

-static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic)
+pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic)
{
struct page *page[1];
int npages;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/