Re: [patch 2/4] mmu_notifier: Callbacks to invalidate addressranges
From: Robin Holt
Date: Fri Feb 01 2008 - 17:10:32 EST
Christoph,
The following code in do_wp_page is a problem.
We are getting this callout when we transition the pte from a read-only
to read-write. Jack and I can not see a reason we would need that
callout. It is causing problems for xpmem in that a write fault goes
to get_user_pages which gets back to do_wp_page that does the callout.
XPMEM only allows either faulting or invalidating to occur for an mm.
As you can see, the case above needs it to be in both states.
Thanks,
Robin
> @@ -1630,6 +1646,8 @@ gotten:
> goto oom;
> cow_user_page(new_page, old_page, address, vma);
>
> + mmu_notifier(invalidate_range_begin, mm, address,
> + address + PAGE_SIZE, 0);
> /*
> * Re-check the pte - we dropped the lock
> */
> @@ -1668,6 +1686,8 @@ gotten:
> page_cache_release(old_page);
> unlock:
> pte_unmap_unlock(page_table, ptl);
> + mmu_notifier(invalidate_range_end, mm,
> + address, address + PAGE_SIZE, 0);
> if (dirty_page) {
> if (vma->vm_file)
> file_update_time(vma->vm_file);
> Index: linux-2.6/mm/mmap.c
> ===================================================================
> --- linux-2.6.orig/mm/mmap.c 2008-01-31 20:58:05.000000000 -0800
> +++ linux-2.6/mm/mmap.c 2008-01-31 20:59:14.000000000 -0800
> @@ -1744,11 +1744,13 @@ static void unmap_region(struct mm_struc
> lru_add_drain();
> tlb = tlb_gather_mmu(mm, 0);
> update_hiwater_rss(mm);
> + mmu_notifier(invalidate_range_begin, mm, start, end, 0);
> unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
> vm_unacct_memory(nr_accounted);
> free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
> next? next->vm_start: 0);
> tlb_finish_mmu(tlb, start, end);
> + mmu_notifier(invalidate_range_end, mm, start, end, 0);
> }
>
> /*
> Index: linux-2.6/mm/hugetlb.c
> ===================================================================
> --- linux-2.6.orig/mm/hugetlb.c 2008-01-31 20:56:03.000000000 -0800
> +++ linux-2.6/mm/hugetlb.c 2008-01-31 20:59:14.000000000 -0800
> @@ -14,6 +14,7 @@
> #include <linux/mempolicy.h>
> #include <linux/cpuset.h>
> #include <linux/mutex.h>
> +#include <linux/mmu_notifier.h>
>
> #include <asm/page.h>
> #include <asm/pgtable.h>
> @@ -743,6 +744,7 @@ void __unmap_hugepage_range(struct vm_ar
> BUG_ON(start & ~HPAGE_MASK);
> BUG_ON(end & ~HPAGE_MASK);
>
> + mmu_notifier(invalidate_range_begin, mm, start, end, 1);
> spin_lock(&mm->page_table_lock);
> for (address = start; address < end; address += HPAGE_SIZE) {
> ptep = huge_pte_offset(mm, address);
> @@ -763,6 +765,7 @@ void __unmap_hugepage_range(struct vm_ar
> }
> spin_unlock(&mm->page_table_lock);
> flush_tlb_range(vma, start, end);
> + mmu_notifier(invalidate_range_end, mm, start, end, 1);
> list_for_each_entry_safe(page, tmp, &page_list, lru) {
> list_del(&page->lru);
> put_page(page);
> Index: linux-2.6/mm/filemap_xip.c
> ===================================================================
> --- linux-2.6.orig/mm/filemap_xip.c 2008-01-31 20:56:03.000000000 -0800
> +++ linux-2.6/mm/filemap_xip.c 2008-01-31 20:59:14.000000000 -0800
> @@ -13,6 +13,7 @@
> #include <linux/module.h>
> #include <linux/uio.h>
> #include <linux/rmap.h>
> +#include <linux/mmu_notifier.h>
> #include <linux/sched.h>
> #include <asm/tlbflush.h>
>
> @@ -189,6 +190,8 @@ __xip_unmap (struct address_space * mapp
> address = vma->vm_start +
> ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
> BUG_ON(address < vma->vm_start || address >= vma->vm_end);
> + mmu_notifier(invalidate_range_begin, mm, address,
> + address + PAGE_SIZE, 1);
> pte = page_check_address(page, mm, address, &ptl);
> if (pte) {
> /* Nuke the page table entry. */
> @@ -200,6 +203,8 @@ __xip_unmap (struct address_space * mapp
> pte_unmap_unlock(pte, ptl);
> page_cache_release(page);
> }
> + mmu_notifier(invalidate_range_end, mm,
> + address, address + PAGE_SIZE, 1);
> }
> spin_unlock(&mapping->i_mmap_lock);
> }
> Index: linux-2.6/mm/mremap.c
> ===================================================================
> --- linux-2.6.orig/mm/mremap.c 2008-01-31 20:56:03.000000000 -0800
> +++ linux-2.6/mm/mremap.c 2008-01-31 20:59:14.000000000 -0800
> @@ -18,6 +18,7 @@
> #include <linux/highmem.h>
> #include <linux/security.h>
> #include <linux/syscalls.h>
> +#include <linux/mmu_notifier.h>
>
> #include <asm/uaccess.h>
> #include <asm/cacheflush.h>
> @@ -124,12 +125,15 @@ unsigned long move_page_tables(struct vm
> unsigned long old_addr, struct vm_area_struct *new_vma,
> unsigned long new_addr, unsigned long len)
> {
> - unsigned long extent, next, old_end;
> + unsigned long extent, next, old_start, old_end;
> pmd_t *old_pmd, *new_pmd;
>
> + old_start = old_addr;
> old_end = old_addr + len;
> flush_cache_range(vma, old_addr, old_end);
>
> + mmu_notifier(invalidate_range_begin, vma->vm_mm,
> + old_addr, old_end, 0);
> for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
> cond_resched();
> next = (old_addr + PMD_SIZE) & PMD_MASK;
> @@ -150,6 +154,7 @@ unsigned long move_page_tables(struct vm
> move_ptes(vma, old_pmd, old_addr, old_addr + extent,
> new_vma, new_pmd, new_addr);
> }
> + mmu_notifier(invalidate_range_end, vma->vm_mm, old_start, old_end, 0);
>
> return len + old_addr - old_end; /* how much done */
> }
>
> --
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@xxxxxxxxxx For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/