[PATCH 20 of 66] pte alloc trans splitting

From: Andrea Arcangeli
Date: Wed Nov 03 2010 - 11:43:02 EST


From: Andrea Arcangeli <aarcange@xxxxxxxxxx>

pte alloc routines must wait for split_huge_page if the pmd is not
present and not null (i.e. pmd_trans_splitting). The additional
branches are optimized away at compile time by pmd_trans_splitting if
the config option is off. However we must pass the vma down in order
to know the anon_vma lock to wait for.

Signed-off-by: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Acked-by: Rik van Riel <riel@xxxxxxxxxx>
Acked-by: Mel Gorman <mel@xxxxxxxxx>
---

diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -133,7 +133,7 @@ static int map_tboot_page(unsigned long
pmd = pmd_alloc(&tboot_mm, pud, vaddr);
if (!pmd)
return -1;
- pte = pte_alloc_map(&tboot_mm, pmd, vaddr);
+ pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr);
if (!pte)
return -1;
set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
diff --git a/include/linux/mm.h b/include/linux/mm.h
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1117,7 +1117,8 @@ static inline int __pmd_alloc(struct mm_
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
#endif

-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+ pmd_t *pmd, unsigned long address);
int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);

/*
@@ -1186,16 +1187,18 @@ static inline void pgtable_page_dtor(str
pte_unmap(pte); \
} while (0)

-#define pte_alloc_map(mm, pmd, address) \
- ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
- NULL: pte_offset_map(pmd, address))
+#define pte_alloc_map(mm, vma, pmd, address) \
+ ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma, \
+ pmd, address))? \
+ NULL: pte_offset_map(pmd, address))

#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
- ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+ ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL, \
+ pmd, address))? \
NULL: pte_offset_map_lock(mm, pmd, address, ptlp))

#define pte_alloc_kernel(pmd, address) \
- ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+ ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
NULL: pte_offset_kernel(pmd, address))

extern void free_area_init(unsigned long * zones_size);
diff --git a/mm/memory.c b/mm/memory.c
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -394,9 +394,11 @@ void free_pgtables(struct mmu_gather *tl
}
}

-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+ pmd_t *pmd, unsigned long address)
{
pgtable_t new = pte_alloc_one(mm, address);
+ int wait_split_huge_page;
if (!new)
return -ENOMEM;

@@ -416,14 +418,18 @@ int __pte_alloc(struct mm_struct *mm, pm
smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */

spin_lock(&mm->page_table_lock);
- if (!pmd_present(*pmd)) { /* Has another populated it ? */
+ wait_split_huge_page = 0;
+ if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
mm->nr_ptes++;
pmd_populate(mm, pmd, new);
new = NULL;
- }
+ } else if (unlikely(pmd_trans_splitting(*pmd)))
+ wait_split_huge_page = 1;
spin_unlock(&mm->page_table_lock);
if (new)
pte_free(mm, new);
+ if (wait_split_huge_page)
+ wait_split_huge_page(vma->anon_vma, pmd);
return 0;
}

@@ -436,10 +442,11 @@ int __pte_alloc_kernel(pmd_t *pmd, unsig
smp_wmb(); /* See comment in __pte_alloc */

spin_lock(&init_mm.page_table_lock);
- if (!pmd_present(*pmd)) { /* Has another populated it ? */
+ if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
pmd_populate_kernel(&init_mm, pmd, new);
new = NULL;
- }
+ } else
+ VM_BUG_ON(pmd_trans_splitting(*pmd));
spin_unlock(&init_mm.page_table_lock);
if (new)
pte_free_kernel(&init_mm, new);
@@ -3215,7 +3222,7 @@ int handle_mm_fault(struct mm_struct *mm
pmd = pmd_alloc(mm, pud, address);
if (!pmd)
return VM_FAULT_OOM;
- pte = pte_alloc_map(mm, pmd, address);
+ pte = pte_alloc_map(mm, vma, pmd, address);
if (!pte)
return VM_FAULT_OOM;

diff --git a/mm/mremap.c b/mm/mremap.c
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -47,7 +47,8 @@ static pmd_t *get_old_pmd(struct mm_stru
return pmd;
}

-static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr)
+static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
@@ -62,7 +63,8 @@ static pmd_t *alloc_new_pmd(struct mm_st
if (!pmd)
return NULL;

- if (!pmd_present(*pmd) && __pte_alloc(mm, pmd, addr))
+ VM_BUG_ON(pmd_trans_huge(*pmd));
+ if (pmd_none(*pmd) && __pte_alloc(mm, vma, pmd, addr))
return NULL;

return pmd;
@@ -147,7 +149,7 @@ unsigned long move_page_tables(struct vm
old_pmd = get_old_pmd(vma->vm_mm, old_addr);
if (!old_pmd)
continue;
- new_pmd = alloc_new_pmd(vma->vm_mm, new_addr);
+ new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr);
if (!new_pmd)
break;
next = (new_addr + PMD_SIZE) & PMD_MASK;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/