[patch 2/3] mmu_notifier: Callbacks to invalidate address ranges

From: Christoph Lameter
Date: Wed Jan 30 2008 - 23:58:55 EST


The invalidation of address ranges in a mm_struct needs to be
performed when pages are removed or permissions etc change.

invalidate_range_begin/end() is frequently called with only mmap_sem
held. If invalidate_range_begin() is called with locks held then we
pass a flag into invalidate_range() to indicate that no sleeping is
possible.

In two cases we use invalidate_range_begin/end to invalidate
single pages because the pair allows holding off new references
(idea by Robin Holt).

do_wp_page(): We hold off new references while update the pte.

xip_unmap: We are not taking the PageLock so we cannot
use the invalidate_page mmu_rmap_notifier. invalidate_range_begin/end
stands in.

Comments state that mmap_sem must be held for
remap_pfn_range() but various drivers do not seem to do this.

Signed-off-by: Andrea Arcangeli <andrea@xxxxxxxxxxxx>
Signed-off-by: Robin Holt <holt@xxxxxxx>
Signed-off-by: Christoph Lameter <clameter@xxxxxxx>

---
mm/filemap_xip.c | 4 ++++
mm/fremap.c | 3 +++
mm/hugetlb.c | 3 +++
mm/memory.c | 15 +++++++++++++--
mm/mmap.c | 2 ++
5 files changed, 25 insertions(+), 2 deletions(-)

Index: linux-2.6/mm/fremap.c
===================================================================
--- linux-2.6.orig/mm/fremap.c 2008-01-30 20:03:05.000000000 -0800
+++ linux-2.6/mm/fremap.c 2008-01-30 20:05:39.000000000 -0800
@@ -15,6 +15,7 @@
#include <linux/rmap.h>
#include <linux/module.h>
#include <linux/syscalls.h>
+#include <linux/mmu_notifier.h>

#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
@@ -211,7 +212,9 @@ asmlinkage long sys_remap_file_pages(uns
spin_unlock(&mapping->i_mmap_lock);
}

+ mmu_notifier(invalidate_range_begin, mm, start, start + size, 0);
err = populate_range(mm, vma, start, size, pgoff);
+ mmu_notifier(invalidate_range_end, mm, 0);
if (!err && !(flags & MAP_NONBLOCK)) {
if (unlikely(has_write_lock)) {
downgrade_write(&mm->mmap_sem);
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c 2008-01-30 20:03:05.000000000 -0800
+++ linux-2.6/mm/memory.c 2008-01-30 20:07:27.000000000 -0800
@@ -50,6 +50,7 @@
#include <linux/delayacct.h>
#include <linux/init.h>
#include <linux/writeback.h>
+#include <linux/mmu_notifier.h>

#include <asm/pgalloc.h>
#include <asm/uaccess.h>
@@ -883,13 +884,16 @@ unsigned long zap_page_range(struct vm_a
struct mmu_gather *tlb;
unsigned long end = address + size;
unsigned long nr_accounted = 0;
+ int atomic = details ? (details->i_mmap_lock != 0) : 0;

lru_add_drain();
tlb = tlb_gather_mmu(mm, 0);
update_hiwater_rss(mm);
+ mmu_notifier(invalidate_range_begin, mm, address, end, atomic);
end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
if (tlb)
tlb_finish_mmu(tlb, address, end);
+ mmu_notifier(invalidate_range_end, mm, atomic);
return end;
}

@@ -1318,7 +1322,7 @@ int remap_pfn_range(struct vm_area_struc
{
pgd_t *pgd;
unsigned long next;
- unsigned long end = addr + PAGE_ALIGN(size);
+ unsigned long start = addr, end = addr + PAGE_ALIGN(size);
struct mm_struct *mm = vma->vm_mm;
int err;

@@ -1352,6 +1356,7 @@ int remap_pfn_range(struct vm_area_struc
pfn -= addr >> PAGE_SHIFT;
pgd = pgd_offset(mm, addr);
flush_cache_range(vma, addr, end);
+ mmu_notifier(invalidate_range_begin, mm, start, end, 0);
do {
next = pgd_addr_end(addr, end);
err = remap_pud_range(mm, pgd, addr, next,
@@ -1359,6 +1364,7 @@ int remap_pfn_range(struct vm_area_struc
if (err)
break;
} while (pgd++, addr = next, addr != end);
+ mmu_notifier(invalidate_range_end, mm, 0);
return err;
}
EXPORT_SYMBOL(remap_pfn_range);
@@ -1442,10 +1448,11 @@ int apply_to_page_range(struct mm_struct
{
pgd_t *pgd;
unsigned long next;
- unsigned long end = addr + size;
+ unsigned long start = addr, end = addr + size;
int err;

BUG_ON(addr >= end);
+ mmu_notifier(invalidate_range_begin, mm, start, end, 0);
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, end);
@@ -1453,6 +1460,7 @@ int apply_to_page_range(struct mm_struct
if (err)
break;
} while (pgd++, addr = next, addr != end);
+ mmu_notifier(invalidate_range_end, mm, 0);
return err;
}
EXPORT_SYMBOL_GPL(apply_to_page_range);
@@ -1630,6 +1638,8 @@ gotten:
goto oom;
cow_user_page(new_page, old_page, address, vma);

+ mmu_notifier(invalidate_range_begin, mm, address,
+ address + PAGE_SIZE - 1, 0);
/*
* Re-check the pte - we dropped the lock
*/
@@ -1668,6 +1678,7 @@ gotten:
page_cache_release(old_page);
unlock:
pte_unmap_unlock(page_table, ptl);
+ mmu_notifier(invalidate_range_end, mm, 0);
if (dirty_page) {
if (vma->vm_file)
file_update_time(vma->vm_file);
Index: linux-2.6/mm/mmap.c
===================================================================
--- linux-2.6.orig/mm/mmap.c 2008-01-30 20:03:05.000000000 -0800
+++ linux-2.6/mm/mmap.c 2008-01-30 20:05:39.000000000 -0800
@@ -1744,11 +1744,13 @@ static void unmap_region(struct mm_struc
lru_add_drain();
tlb = tlb_gather_mmu(mm, 0);
update_hiwater_rss(mm);
+ mmu_notifier(invalidate_range_begin, mm, start, end, 0);
unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
next? next->vm_start: 0);
tlb_finish_mmu(tlb, start, end);
+ mmu_notifier(invalidate_range_end, mm, 0);
}

/*
Index: linux-2.6/mm/hugetlb.c
===================================================================
--- linux-2.6.orig/mm/hugetlb.c 2008-01-30 20:03:05.000000000 -0800
+++ linux-2.6/mm/hugetlb.c 2008-01-30 20:05:39.000000000 -0800
@@ -14,6 +14,7 @@
#include <linux/mempolicy.h>
#include <linux/cpuset.h>
#include <linux/mutex.h>
+#include <linux/mmu_notifier.h>

#include <asm/page.h>
#include <asm/pgtable.h>
@@ -743,6 +744,7 @@ void __unmap_hugepage_range(struct vm_ar
BUG_ON(start & ~HPAGE_MASK);
BUG_ON(end & ~HPAGE_MASK);

+ mmu_notifier(invalidate_range_begin, mm, start, end, 1);
spin_lock(&mm->page_table_lock);
for (address = start; address < end; address += HPAGE_SIZE) {
ptep = huge_pte_offset(mm, address);
@@ -763,6 +765,7 @@ void __unmap_hugepage_range(struct vm_ar
}
spin_unlock(&mm->page_table_lock);
flush_tlb_range(vma, start, end);
+ mmu_notifier(invalidate_range_end, mm, 1);
list_for_each_entry_safe(page, tmp, &page_list, lru) {
list_del(&page->lru);
put_page(page);
Index: linux-2.6/mm/filemap_xip.c
===================================================================
--- linux-2.6.orig/mm/filemap_xip.c 2008-01-30 20:03:05.000000000 -0800
+++ linux-2.6/mm/filemap_xip.c 2008-01-30 20:05:39.000000000 -0800
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/uio.h>
#include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
#include <linux/sched.h>
#include <asm/tlbflush.h>

@@ -189,6 +190,8 @@ __xip_unmap (struct address_space * mapp
address = vma->vm_start +
((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+ mmu_notifier(invalidate_range_begin, mm, address,
+ address + PAGE_SIZE - 1, 1);
pte = page_check_address(page, mm, address, &ptl);
if (pte) {
/* Nuke the page table entry. */
@@ -200,6 +203,7 @@ __xip_unmap (struct address_space * mapp
pte_unmap_unlock(pte, ptl);
page_cache_release(page);
}
+ mmu_notifier(invalidate_range_end, mm, 1);
}
spin_unlock(&mapping->i_mmap_lock);
}

--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/