Re: RFC: Early freeing of page tables - 2.2.5 patch

Jakub Jelinek (jj@sunsite.ms.mff.cuni.cz)
Fri, 23 Apr 1999 17:01:00 +0200 (CEST)


> > What do you think about this?
>
> Looks sensible. It's far cheaper to detect whether a page
> table/directory is empty by looking at the surrounding vmas than by
> examining the directory tree.
>
> > The patch is against 2.2.5, I'll update it soon to 2.2.6. It won't apply
> > cleanly to 2.2.6.
>
> OK, I'll give it a spin on SMP Intel if you want once you have a 2.2.6
> version.

Thanks. Below is 2.2.6 version, which should apply cleanly to
2.2.7-1 as well.

--- linux-2.2.6/mm/memory.c.jj Tue Apr 13 01:18:26 1999
+++ linux-2.2.6/mm/memory.c Fri Apr 23 16:54:03 1999
@@ -71,45 +71,6 @@ void oom(struct task_struct * task)
force_sig(SIGKILL, task);
}

-/*
- * Note: this doesn't free the actual pages themselves. That
- * has been handled earlier when unmapping all the memory regions.
- */
-static inline void free_one_pmd(pmd_t * dir)
-{
- pte_t * pte;
-
- if (pmd_none(*dir))
- return;
- if (pmd_bad(*dir)) {
- printk("free_one_pmd: bad directory entry %08lx\n", pmd_val(*dir));
- pmd_clear(dir);
- return;
- }
- pte = pte_offset(dir, 0);
- pmd_clear(dir);
- pte_free(pte);
-}
-
-static inline void free_one_pgd(pgd_t * dir)
-{
- int j;
- pmd_t * pmd;
-
- if (pgd_none(*dir))
- return;
- if (pgd_bad(*dir)) {
- printk("free_one_pgd: bad directory entry %08lx\n", pgd_val(*dir));
- pgd_clear(dir);
- return;
- }
- pmd = pmd_offset(dir, 0);
- pgd_clear(dir);
- for (j = 0; j < PTRS_PER_PMD ; j++)
- free_one_pmd(pmd+j);
- pmd_free(pmd);
-}
-
/* Low and high watermarks for page table cache.
The system should try to have pgt_water[0] <= cache elements <= pgt_water[1]
*/
@@ -121,27 +82,6 @@ int check_pgt_cache(void)
return do_check_pgt_cache(pgt_cache_water[0], pgt_cache_water[1]);
}

-
-/*
- * This function clears all user-level page tables of a process - this
- * is needed by execve(), so that old pages aren't in the way.
- */
-void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr)
-{
- pgd_t * page_dir = mm->pgd;
-
- if (page_dir && page_dir != swapper_pg_dir) {
- page_dir += first;
- do {
- free_one_pgd(page_dir);
- page_dir++;
- } while (--nr);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
- }
-}
-
/*
* This function just free's the page directory - the
* pages tables themselves have been freed earlier by
@@ -322,81 +262,114 @@ static inline void forget_pte(pte_t page
}
}

-static inline int zap_pte_range(pmd_t * pmd, unsigned long address, unsigned long size)
+/*
+ * remove user pages in a given range.
+ *
+ * 21Apr99 Merged into one routine from several inline routines to reduce
+ * variable count and make things faster.
+ * Implement early pgtable freeing. -jj
+ */
+void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size,
+ unsigned long prevend, unsigned long nextstart, int flags)
{
+ pgd_t * pgd;
+ pmd_t * pmd;
pte_t * pte;
- int freed;
+ unsigned long end = address + size;
+ int freed = 0;

- if (pmd_none(*pmd))
- return 0;
- if (pmd_bad(*pmd)) {
- printk("zap_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
- pmd_clear(pmd);
- return 0;
- }
- pte = pte_offset(pmd, address);
- address &= ~PMD_MASK;
- if (address + size > PMD_SIZE)
- size = PMD_SIZE - address;
- size >>= PAGE_SHIFT;
- freed = 0;
+ pgd = pgd_offset(mm, address)-1;
+
for (;;) {
- pte_t page;
- if (!size)
- break;
- page = *pte;
- pte++;
- size--;
- if (pte_none(page))
+ pgd++;
+
+ /* zap_pmd_range */
+ if (pgd_none(*pgd))
+ goto skip_zap_pmd_range;
+ if (pgd_bad(*pgd)) {
+ printk("zap_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
+ pgd_clear(pgd);
+skip_zap_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK;
+ if (address >= end)
+ goto out2;
continue;
- pte_clear(pte-1);
- freed += free_pte(page);
- }
- return freed;
-}
+ }

-static inline int zap_pmd_range(pgd_t * dir, unsigned long address, unsigned long size)
-{
- pmd_t * pmd;
- unsigned long end;
- int freed;
+ pmd = pmd_offset(pgd, address);
+
+ do {
+ /* zap_pte_range */
+ if (pmd_none(*pmd))
+ goto skip_zap_pte_range;
+ if (pmd_bad(*pmd)) {
+ printk("zap_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
+ pmd_clear(pmd);
+skip_zap_pte_range: address = (address + PMD_SIZE) & PMD_MASK;
+ if (address >= end) {
+ if ((flags & ZAP_RANGE_CLEARPTE) &&
+ ((address - 1) & PGDIR_MASK) >= prevend &&
+ ((end + PGDIR_SIZE - 1) & PGDIR_MASK) <= nextstart) {
+ pgd_clear(pgd);
+ pmd_free((pmd_t *)((unsigned long)pmd & ~PMD_TABLE_MASK));
+ }
+ goto out2;
+ }
+ goto cont_zap_pmd_range;
+ }

- if (pgd_none(*dir))
- return 0;
- if (pgd_bad(*dir)) {
- printk("zap_pmd_range: bad pgd (%08lx)\n", pgd_val(*dir));
- pgd_clear(dir);
- return 0;
- }
- pmd = pmd_offset(dir, address);
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
- freed = 0;
- do {
- freed += zap_pte_range(pmd, address, end - address);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address < end);
- return freed;
-}
+ pte = pte_offset(pmd, address);

-/*
- * remove user pages in a given range.
- */
-void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
-{
- pgd_t * dir;
- unsigned long end = address + size;
- int freed = 0;
+ do {
+ pte_t page = *pte;

- dir = pgd_offset(mm, address);
- while (address < end) {
- freed += zap_pmd_range(dir, address, end - address);
- address = (address + PGDIR_SIZE) & PGDIR_MASK;
- dir++;
+ if (pte_none(page))
+ goto cont_zap_pte_range;
+ pte_clear(pte);
+ freed += free_pte(page);
+
+cont_zap_pte_range: address += PAGE_SIZE;
+ if (address >= end)
+ goto out;
+ pte++;
+ } while ((unsigned long)pte & PTE_TABLE_MASK);
+ if (address - PMD_SIZE >= prevend &&
+ (flags & ZAP_RANGE_CLEARPTE)) {
+ pmd_clear(pmd);
+ pte_free((pte_t *)((unsigned long)pte - (PTRS_PER_PTE * sizeof(pte_t))));
+ }
+
+cont_zap_pmd_range: pmd++;
+ } while ((unsigned long)pmd & PMD_TABLE_MASK);
+ if (address - PGDIR_SIZE >= prevend &&
+ (flags & ZAP_RANGE_CLEARPTE)) {
+ pgd_clear(pgd);
+ pmd_free((pmd_t *)((unsigned long)pmd - (PTRS_PER_PMD * sizeof(pmd_t))));
+ }
}
+
+out: if (flags & ZAP_RANGE_CLEARPTE) {
+ if (((address - 1) & PMD_MASK) >= prevend &&
+ ((end + PMD_SIZE - 1) & PMD_MASK) <= nextstart) {
+ pmd_clear(pmd);
+ pte_free((pte_t *)((unsigned long)pte & ~PTE_TABLE_MASK));
+ }
+ if (((address - 1) & PGDIR_MASK) >= prevend &&
+ ((end + PGDIR_SIZE - 1) & PGDIR_MASK) <= nextstart) {
+ pgd_clear(pgd);
+ pmd_free((pmd_t *)((unsigned long)pmd & ~PMD_TABLE_MASK));
+ }
+ }
+out2: if (flags & ZAP_RANGE_FLUSHTLB) {
+ address = (end - size) & PMD_MASK;
+ if (address < prevend)
+ address += PMD_SIZE;
+ end = (end + PMD_SIZE - 1) & PMD_MASK;
+ if (end > nextstart)
+ end -= PMD_SIZE;
+ if (address < end)
+ flush_tlb_pgtables(mm, address, end);
+ }
+
/*
* Update rss for the mm_struct (not necessarily current->mm)
*/
@@ -751,7 +724,8 @@ void vmtruncate(struct inode * inode, un
/* mapping wholly truncated? */
if (mpnt->vm_offset >= offset) {
flush_cache_range(mm, start, end);
- zap_page_range(mm, start, len);
+ zap_page_range(mm, start, len,
+ start, start + len, 0);
flush_tlb_range(mm, start, end);
continue;
}
@@ -767,7 +741,8 @@ void vmtruncate(struct inode * inode, un
start = (start + ~PAGE_MASK) & PAGE_MASK;
}
flush_cache_range(mm, start, end);
- zap_page_range(mm, start, len);
+ zap_page_range(mm, start, len,
+ start, start + len, 0);
flush_tlb_range(mm, start, end);
} while ((mpnt = mpnt->vm_next_share) != NULL);
}
--- linux-2.2.6/mm/mmap.c.jj Tue Apr 13 06:35:15 1999
+++ linux-2.2.6/mm/mmap.c Fri Apr 23 16:54:03 1999
@@ -115,7 +115,7 @@ asmlinkage unsigned long sys_brk(unsigne

/* Always allow shrinking brk. */
if (brk <= mm->brk) {
- if (!do_munmap(newbrk, oldbrk-newbrk))
+ if (!do_munmap(newbrk, oldbrk-newbrk, DO_MUNMAP_CLEARPTE))
goto set_brk;
goto out;
}
@@ -175,7 +175,6 @@ unsigned long do_mmap(struct file * file
struct mm_struct * mm = current->mm;
struct vm_area_struct * vma;
int error;
-
if ((len = PAGE_ALIGN(len)) == 0)
return addr;

@@ -284,26 +283,26 @@ unsigned long do_mmap(struct file * file

/* Clear old maps */
error = -ENOMEM;
- if (do_munmap(addr, len))
+ if (do_munmap(addr, len, 0))
goto free_vma;

/* Check against address space limit. */
if ((mm->total_vm << PAGE_SHIFT) + len
> current->rlim[RLIMIT_AS].rlim_cur)
- goto free_vma;
+ goto unmap_and_free_vma;

/* Private writable mapping? Check memory availability.. */
if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
!(flags & MAP_NORESERVE) &&
!vm_enough_memory(len >> PAGE_SHIFT))
- goto free_vma;
+ goto unmap_and_free_vma;

if (file) {
int correct_wcount = 0;
if (vma->vm_flags & VM_DENYWRITE) {
if (file->f_dentry->d_inode->i_writecount > 0) {
error = -ETXTBSY;
- goto free_vma;
+ goto unmap_and_free_vma;
}
/* f_op->mmap might possibly sleep
* (generic_file_mmap doesn't, but other code
@@ -328,7 +327,7 @@ unsigned long do_mmap(struct file * file
* after the call. Save the values we need now ...
*/
flags = vma->vm_flags;
- addr = vma->vm_start; /* can addr have changed?? */
+ addr = vma->vm_start; /* can addr have changed?? Yes. -jj */
insert_vm_struct(mm, vma);
merge_segments(mm, vma->vm_start, vma->vm_end);

@@ -340,10 +339,30 @@ unsigned long do_mmap(struct file * file
return addr;

unmap_and_free_vma:
- /* Undo any partial mapping done by a device driver. */
- flush_cache_range(mm, vma->vm_start, vma->vm_end);
- zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
- flush_tlb_range(mm, vma->vm_start, vma->vm_end);
+ /* Undo any partial mapping done by a device driver.
+ * If we are past successfull do_munmap, then we have to
+ * free the page tables associated with it, as for the
+ * common case of remapping a new area over an old
+ * area we don't clear the page tables. -jj */
+ {
+ struct vm_area_struct * prev, * next;
+ unsigned long prevend, nextstart;
+ unsigned long start, end;
+
+ start = vma->vm_start;
+ if (addr < start) start = addr;
+ end = vma->vm_end;
+ if (addr + len < end) end = addr + len;
+
+ next = find_vma_prev(mm, vma->vm_start, &prev);
+ prevend = prev ? prev->vm_end : 0;
+ nextstart = next ? next->vm_start : USER_PTRS_PER_PGD * PGDIR_SIZE;
+
+ flush_cache_range(mm, start, end);
+ zap_page_range(mm, start, end - start, prevend, nextstart,
+ ZAP_RANGE_CLEARPTE|ZAP_RANGE_FLUSHTLB);
+ flush_tlb_range(mm, start, end);
+ }
free_vma:
kmem_cache_free(vm_area_cachep, vma);
return error;
@@ -467,131 +486,25 @@ struct vm_area_struct * find_vma_prev(st
return NULL;
}

-/* Normal function to fix up a mapping
- * This function is the default for when an area has no specific
- * function. This may be used as part of a more specific routine.
- * This function works out what part of an area is affected and
- * adjusts the mapping information. Since the actual page
- * manipulation is done in do_mmap(), none need be done here,
- * though it would probably be more appropriate.
- *
- * By the time this function is called, the area struct has been
- * removed from the process mapping list, so it needs to be
- * reinserted if necessary.
- *
- * The 4 main cases are:
- * Unmapping the whole area
- * Unmapping from the start of the segment to a point in it
- * Unmapping from an intermediate point to the end
- * Unmapping between to intermediate points, making a hole.
- *
- * Case 4 involves the creation of 2 new areas, for each side of
- * the hole. If possible, we reuse the existing area rather than
- * allocate a new one, and the return indicates whether the old
- * area was reused.
- */
-static struct vm_area_struct * unmap_fixup(struct vm_area_struct *area,
- unsigned long addr, size_t len, struct vm_area_struct *extra)
-{
- struct vm_area_struct *mpnt;
- unsigned long end = addr + len;
-
- area->vm_mm->total_vm -= len >> PAGE_SHIFT;
- if (area->vm_flags & VM_LOCKED)
- area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
-
- /* Unmapping the whole area. */
- if (addr == area->vm_start && end == area->vm_end) {
- if (area->vm_ops && area->vm_ops->close)
- area->vm_ops->close(area);
- if (area->vm_file)
- fput(area->vm_file);
- kmem_cache_free(vm_area_cachep, area);
- return extra;
- }
-
- /* Work out to one of the ends. */
- if (end == area->vm_end)
- area->vm_end = addr;
- else if (addr == area->vm_start) {
- area->vm_offset += (end - area->vm_start);
- area->vm_start = end;
- } else {
- /* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
- /* Add end mapping -- leave beginning for below */
- mpnt = extra;
- extra = NULL;
-
- mpnt->vm_mm = area->vm_mm;
- mpnt->vm_start = end;
- mpnt->vm_end = area->vm_end;
- mpnt->vm_page_prot = area->vm_page_prot;
- mpnt->vm_flags = area->vm_flags;
- mpnt->vm_ops = area->vm_ops;
- mpnt->vm_offset = area->vm_offset + (end - area->vm_start);
- mpnt->vm_file = area->vm_file;
- mpnt->vm_pte = area->vm_pte;
- if (mpnt->vm_file)
- mpnt->vm_file->f_count++;
- if (mpnt->vm_ops && mpnt->vm_ops->open)
- mpnt->vm_ops->open(mpnt);
- area->vm_end = addr; /* Truncate area */
- insert_vm_struct(current->mm, mpnt);
- }
-
- insert_vm_struct(current->mm, area);
- return extra;
-}
-
-/*
- * Try to free as many page directory entries as we can,
- * without having to work very hard at actually scanning
- * the page tables themselves.
- *
- * Right now we try to free page tables if we have a nice
- * PGDIR-aligned area that got free'd up. We could be more
- * granular if we want to, but this is fast and simple,
- * and covers the bad cases.
- *
- * "prev", if it exists, points to a vma before the one
- * we just free'd - but there's no telling how much before.
- */
-static void free_pgtables(struct mm_struct * mm, struct vm_area_struct *prev,
- unsigned long start, unsigned long end)
-{
- unsigned long first = start & PGDIR_MASK;
- unsigned long last = (end + PGDIR_SIZE - 1) & PGDIR_MASK;
-
- if (!prev) {
- prev = mm->mmap;
- if (!prev)
- goto no_mmaps;
- if (prev->vm_end > start) {
- if (last > prev->vm_start)
- last = prev->vm_start;
- goto no_mmaps;
- }
- }
- for (;;) {
- struct vm_area_struct *next = prev->vm_next;
-
- if (next) {
- if (next->vm_start < start) {
- prev = next;
- continue;
- }
- if (last > next->vm_start)
- last = next->vm_start;
- }
- if (prev->vm_end > first)
- first = prev->vm_end + PGDIR_SIZE - 1;
- break;
- }
-no_mmaps:
- first = first >> PGDIR_SHIFT;
- last = last >> PGDIR_SHIFT;
- if (last > first)
- clear_page_tables(mm, first, last-first);
+static void unmap_hole_fixup(struct vm_area_struct *mpnt,
+ struct vm_area_struct *extra,
+ unsigned long addr, unsigned long end)
+{
+ extra->vm_mm = mpnt->vm_mm;
+ extra->vm_start = end;
+ extra->vm_end = mpnt->vm_end;
+ extra->vm_page_prot = mpnt->vm_page_prot;
+ extra->vm_flags = mpnt->vm_flags;
+ extra->vm_ops = mpnt->vm_ops;
+ extra->vm_offset = mpnt->vm_offset + (end - mpnt->vm_start);
+ extra->vm_file = mpnt->vm_file;
+ extra->vm_pte = mpnt->vm_pte;
+ if (extra->vm_file)
+ extra->vm_file->f_count++;
+ if (extra->vm_ops && extra->vm_ops->open)
+ extra->vm_ops->open(extra);
+ mpnt->vm_end = addr; /* Truncate area */
+ insert_vm_struct(current->mm, extra);
}

/* Munmap is split into 2 main parts -- this part which finds
@@ -599,10 +512,11 @@ no_mmaps:
* work. This now handles partial unmappings.
* Jeremy Fitzhardine <jeremy@sw.oz.au>
*/
-int do_munmap(unsigned long addr, size_t len)
+int do_munmap(unsigned long addr, size_t len, int flags)
{
struct mm_struct * mm;
struct vm_area_struct *mpnt, *prev, **npp, *free, *extra;
+ unsigned long prevend, nextstart;

if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
return -EINVAL;
@@ -638,6 +552,12 @@ int do_munmap(unsigned long addr, size_t
return -ENOMEM;

npp = (prev ? &prev->vm_next : &mm->mmap);
+
+ if (mpnt->vm_start < addr)
+ prevend = addr;
+ else
+ prevend = prev ? prev->vm_end : 0;
+
free = NULL;
for ( ; mpnt && mpnt->vm_start < addr+len; mpnt = *npp) {
*npp = mpnt->vm_next;
@@ -646,6 +566,8 @@ int do_munmap(unsigned long addr, size_t
if (mm->mmap_avl)
avl_remove(mpnt, &mm->mmap_avl);
}
+
+ nextstart = mpnt ? mpnt->vm_start : USER_PTRS_PER_PGD * PGDIR_SIZE;

/* Ok - we have the memory areas we should free on the 'free' list,
* so release them, and unmap the page range..
@@ -654,6 +576,7 @@ int do_munmap(unsigned long addr, size_t
*/
while ((mpnt = free) != NULL) {
unsigned long st, end, size;
+ unsigned long nextst;

free = free->vm_next;

@@ -661,29 +584,67 @@ int do_munmap(unsigned long addr, size_t
end = addr+len;
end = end > mpnt->vm_end ? mpnt->vm_end : end;
size = end - st;
+ nextst = (free != NULL || end != mpnt->vm_end) ?
+ end : nextstart;

if (mpnt->vm_ops && mpnt->vm_ops->unmap)
mpnt->vm_ops->unmap(mpnt, st, size);

remove_shared_vm_struct(mpnt);
mm->map_count--;
-
+
flush_cache_range(mm, st, end);
- zap_page_range(mm, st, size);
+ zap_page_range(mm, st, size, prevend, nextst, flags);
flush_tlb_range(mm, st, end);

/*
* Fix the mapping, and free the old area if it wasn't reused.
+ * The 4 main cases are:
+ * Unmapping the whole area
+ * Unmapping from the start of the segment to a point in it
+ * Unmapping from an intermediate point to the end
+ * Unmapping between to intermediate points, making a hole.
+ *
+ * Case 4 involves the creation of 2 new areas, for each side of
+ * the hole. If possible, we reuse the existing area rather than
+ * allocate a new one.
+ *
+ * We handle the common cases in this routine and let the
+ * case 4 be handled out of line.
*/
- extra = unmap_fixup(mpnt, st, size, extra);
+
+ mpnt->vm_mm->total_vm -= size >> PAGE_SHIFT;
+ if (mpnt->vm_flags & VM_LOCKED)
+ mpnt->vm_mm->locked_vm -= size >> PAGE_SHIFT;
+
+ /* Unmapping the whole area. */
+ if (st == mpnt->vm_start && end == mpnt->vm_end) {
+ if (mpnt->vm_ops && mpnt->vm_ops->close)
+ mpnt->vm_ops->close(mpnt);
+ if (mpnt->vm_file)
+ fput(mpnt->vm_file);
+ kmem_cache_free(vm_area_cachep, mpnt);
+ } else {
+ /* Work out to one of the ends. */
+ if (end == mpnt->vm_end)
+ mpnt->vm_end = st;
+ else if (st == mpnt->vm_start) {
+ mpnt->vm_offset += (end - mpnt->vm_start);
+ mpnt->vm_start = end;
+ } else {
+ /* Unmapping a hole: mpnt->vm_start < st <= end < mpnt->vm_end */
+ /* Add end mapping -- leave beginning for below */
+ unmap_hole_fixup(mpnt, extra, st, end);
+ extra = NULL;
+ }
+ insert_vm_struct(current->mm, mpnt);
+ }
}

/* Release the extra vma struct if it wasn't used */
if (extra)
kmem_cache_free(vm_area_cachep, extra);

- free_pgtables(mm, prev, addr, addr+len);
-
mm->mmap_cache = NULL; /* Kill the cache. */
return 0;
}
@@ -694,7 +655,7 @@ asmlinkage int sys_munmap(unsigned long

down(&current->mm->mmap_sem);
lock_kernel();
- ret = do_munmap(addr, len);
+ ret = do_munmap(addr, len, DO_MUNMAP_CLEARPTE);
unlock_kernel();
up(&current->mm->mmap_sem);
return ret;
@@ -734,7 +695,9 @@ void exit_mmap(struct mm_struct * mm)
}
mm->map_count--;
remove_shared_vm_struct(mpnt);
- zap_page_range(mm, start, size);
+ zap_page_range(mm, start, size, 0, next ? next->vm_start :
+ USER_PTRS_PER_PGD * PGDIR_SIZE,
+ ZAP_RANGE_CLEARPTE);
if (mpnt->vm_file)
fput(mpnt->vm_file);
kmem_cache_free(vm_area_cachep, mpnt);
@@ -744,8 +707,6 @@ void exit_mmap(struct mm_struct * mm)
/* This is just debugging */
if (mm->map_count)
printk("exit_mmap: map count is %d\n", mm->map_count);
-
- clear_page_tables(mm, 0, USER_PTRS_PER_PGD);
}

/* Insert vm structure into process list sorted by address
--- linux-2.2.6/mm/mremap.c.jj Sun Nov 22 18:38:19 1998
+++ linux-2.2.6/mm/mremap.c Fri Apr 23 16:54:03 1999
@@ -88,6 +88,8 @@ static int move_page_tables(struct mm_st
unsigned long new_addr, unsigned long old_addr, unsigned long len)
{
unsigned long offset = len;
+ unsigned long prevend, nextstart;
+ struct vm_area_struct * vma, * prev;

flush_cache_range(mm, old_addr, old_addr + len);
flush_tlb_range(mm, old_addr, old_addr + len);
@@ -112,15 +114,20 @@ static int move_page_tables(struct mm_st
* the old page tables)
*/
oops_we_failed:
+ vma = find_vma_prev(mm, new_addr, &prev);
+ prevend = prev ? prev->vm_end : 0;
+ nextstart = vma ? vma->vm_start : USER_PTRS_PER_PGD * PGDIR_SIZE;
+
flush_cache_range(mm, new_addr, new_addr + len);
while ((offset += PAGE_SIZE) < len)
move_one_page(mm, new_addr + offset, old_addr + offset);
- zap_page_range(mm, new_addr, new_addr + len);
+ zap_page_range(mm, new_addr, new_addr + len, prevend, nextstart,
+ ZAP_RANGE_CLEARPTE|ZAP_RANGE_FLUSHTLB);
flush_tlb_range(mm, new_addr, new_addr + len);
return -1;
}

-static inline unsigned long move_vma(struct vm_area_struct * vma,
+static inline unsigned long move_vma(struct vm_area_struct * vma,
unsigned long addr, unsigned long old_len, unsigned long new_len)
{
struct vm_area_struct * new_vma;
@@ -140,7 +147,7 @@ static inline unsigned long move_vma(str
new_vma->vm_ops->open(new_vma);
insert_vm_struct(current->mm, new_vma);
merge_segments(current->mm, new_vma->vm_start, new_vma->vm_end);
- do_munmap(addr, old_len);
+ do_munmap(addr, old_len, DO_MUNMAP_CLEARPTE);
current->mm->total_vm += new_len >> PAGE_SHIFT;
if (new_vma->vm_flags & VM_LOCKED) {
current->mm->locked_vm += new_len >> PAGE_SHIFT;
@@ -178,7 +185,7 @@ asmlinkage unsigned long sys_mremap(unsi
*/
ret = addr;
if (old_len >= new_len) {
- do_munmap(addr+new_len, old_len - new_len);
+ do_munmap(addr+new_len, old_len - new_len, DO_MUNMAP_CLEARPTE);
goto out;
}

--- linux-2.2.6/include/linux/mm.h.jj Fri Apr 16 23:28:11 1999
+++ linux-2.2.6/include/linux/mm.h Fri Apr 23 16:54:03 1999
@@ -285,7 +285,10 @@ extern void free_page_tables(struct mm_s
extern void clear_page_tables(struct mm_struct *, unsigned long, int);
extern int new_page_tables(struct task_struct * tsk);

-extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
+#define ZAP_RANGE_CLEARPTE 1
+#define ZAP_RANGE_FLUSHTLB 2
+extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size,
+ unsigned long prevend, unsigned long nextstart, int flags);
extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
@@ -313,7 +316,8 @@ extern unsigned long get_unmapped_area(u

extern unsigned long do_mmap(struct file *, unsigned long, unsigned long,
unsigned long, unsigned long, unsigned long);
-extern int do_munmap(unsigned long, size_t);
+#define DO_MUNMAP_CLEARPTE (ZAP_RANGE_CLEARPTE|ZAP_RANGE_FLUSHTLB)
+extern int do_munmap(unsigned long, size_t, int);

/* filemap.c */
extern void remove_inode_page(struct page *);
@@ -370,6 +374,7 @@ static inline int expand_stack(struct vm

/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
+struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, struct vm_area_struct **pprev);

/* Look up the first VMA which intersects the interval start_addr..end_addr-1,
NULL if none. Assume start_addr < end_addr. */
--- linux-2.2.6/include/asm-i386/pgtable.h.jj Fri Apr 16 23:28:08 1999
+++ linux-2.2.6/include/asm-i386/pgtable.h Fri Apr 23 16:54:03 1999
@@ -74,6 +74,12 @@ static inline void flush_tlb_range(struc
__flush_tlb();
}

+extern inline void flush_tlb_pgtables(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ /* ia32 has hw page tables. */
+}
+
#else

/*
--- linux-2.2.6/include/asm-mips/pgtable.h.jj Thu Mar 11 03:11:51 1999
+++ linux-2.2.6/include/asm-mips/pgtable.h Fri Apr 23 16:54:03 1999
@@ -47,6 +47,10 @@ extern void (*flush_tlb_range)(struct mm
unsigned long end);
extern void (*flush_tlb_page)(struct vm_area_struct *vma, unsigned long page);

+extern __inline__ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+}
+
/*
* - add_wired_entry() add a fixed TLB entry, and move wired register
*/
--- linux-2.2.6/include/asm-alpha/pgtable.h.jj Thu Mar 11 03:11:51 1999
+++ linux-2.2.6/include/asm-alpha/pgtable.h Fri Apr 23 16:54:03 1999
@@ -187,6 +187,13 @@ extern void flush_tlb_range(struct mm_st

#endif /* __SMP__ */

+extern __inline__ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ /* Although Alpha uses VPTE caches, this can be a nop, as Alpha does
+ not have finegrained tlb flushing, so it will flush VPTE stuff
+ during next flush_tlb_range. */
+}
+
/* Certain architectures need to do special things when PTEs
* within a page table are directly modified. Thus, the following
* hook is made available.
--- linux-2.2.6/include/asm-m68k/pgtable.h.jj Thu Mar 11 03:11:51 1999
+++ linux-2.2.6/include/asm-m68k/pgtable.h Fri Apr 23 16:54:03 1999
@@ -235,6 +235,10 @@ extern inline void flush_tlb_kernel_page
__asm__ __volatile__("pflush #4,#4,(%0)" : : "a" (addr));
}

+extern __inline__ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+}
+
/* Certain architectures need to do special things when pte's
* within a page table are directly modified. Thus, the following
* hook is made available.
--- linux-2.2.6/include/asm-sparc/pgtable.h.jj Thu Mar 11 01:53:37 1999
+++ linux-2.2.6/include/asm-sparc/pgtable.h Fri Apr 23 16:54:03 1999
@@ -472,6 +472,11 @@ BTFIXUPDEF_CALL(void, flush_tlb_page, st
#define flush_tlb_range(mm,start,end) BTFIXUP_CALL(flush_tlb_range)(mm,start,end)
#define flush_tlb_page(vma,addr) BTFIXUP_CALL(flush_tlb_page)(vma,addr)

+extern __inline__ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ /* Sparc32 has hw page tables. */
+}
+
BTFIXUPDEF_CALL(void, flush_page_to_ram, unsigned long)
BTFIXUPDEF_CALL(void, flush_sig_insns, struct mm_struct *, unsigned long)

--- linux-2.2.6/include/asm-ppc/pgtable.h.jj Thu Mar 11 06:30:32 1999
+++ linux-2.2.6/include/asm-ppc/pgtable.h Fri Apr 23 16:54:03 1999
@@ -20,6 +20,11 @@ extern void local_flush_tlb_range(struct
#define flush_tlb_page local_flush_tlb_page
#define flush_tlb_range local_flush_tlb_range

+extern __inline__ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ /* PPC has hw page tables. */
+}
+
/*
* No cache flushing is required when address mappings are
* changed, because the caches on PowerPCs are physically
--- linux-2.2.6/include/asm-sparc64/pgtable.h.jj Sun Mar 28 19:07:47 1999
+++ linux-2.2.6/include/asm-sparc64/pgtable.h Fri Apr 23 16:54:03 1999
@@ -248,6 +248,17 @@ extern __inline__ void flush_tlb_page(st

#endif

+/* This will change for Cheetah and later chips. */
+#define VPTE_BASE 0xfffffffe00000000
+
+extern __inline__ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start,
+ unsigned long end)
+{
+ flush_tlb_range(mm,
+ VPTE_BASE + (((long)start) >> (PAGE_SHIFT - 3)),
+ VPTE_BASE + (((long)end) >> (PAGE_SHIFT - 3)));
+}
+
#define mk_pte(page, pgprot) (__pte(__pa(page) | pgprot_val(pgprot)))
#define mk_pte_phys(physpage, pgprot) (__pte((physpage) | pgprot_val(pgprot)))
#define pte_modify(_pte, newprot) \
--- linux-2.2.6/include/asm-arm/pgtable.h.jj Thu Mar 11 03:11:51 1999
+++ linux-2.2.6/include/asm-arm/pgtable.h Fri Apr 23 16:54:03 1999
@@ -9,6 +9,10 @@

extern int do_check_pgt_cache(int, int);

+extern __inline__ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+}
+
/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
#define PageSkip(page) (0)
#define kern_addr_valid(addr) (1)
--- linux-2.2.6/ipc/shm.c.jj Mon Apr 12 23:27:26 1999
+++ linux-2.2.6/ipc/shm.c Fri Apr 23 16:54:03 1999
@@ -397,7 +397,7 @@ static int shm_map (struct vm_area_struc
unsigned long tmp;

/* clear old mappings */
- do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
+ do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start, DO_MUNMAP_CLEARPTE);

/* add new mapping */
tmp = shmd->vm_end - shmd->vm_start;
@@ -581,7 +581,7 @@ asmlinkage int sys_shmdt (char *shmaddr)
shmdnext = shmd->vm_next;
if (shmd->vm_ops == &shm_vm_ops
&& shmd->vm_start - shmd->vm_offset == (ulong) shmaddr)
- do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start);
+ do_munmap(shmd->vm_start, shmd->vm_end - shmd->vm_start, DO_MUNMAP_CLEARPTE);
}
unlock_kernel();
up(&current->mm->mmap_sem);
--- linux-2.2.6/drivers/char/mem.c.jj Mon Apr 12 19:35:41 1999
+++ linux-2.2.6/drivers/char/mem.c Fri Apr 23 16:54:03 1999
@@ -339,7 +339,7 @@ static inline size_t read_zero_pagealign
count = size;

flush_cache_range(mm, addr, addr + count);
- zap_page_range(mm, addr, count);
+ zap_page_range(mm, addr, count, addr, addr + count, 0);
zeromap_page_range(addr, count, PAGE_COPY);
flush_tlb_range(mm, addr, addr + count);

--- linux-2.2.6/arch/sparc/kernel/sys_sunos.c.jj Tue Oct 27 18:52:20 1998
+++ linux-2.2.6/arch/sparc/kernel/sys_sunos.c Fri Apr 23 16:54:03 1999
@@ -173,7 +173,7 @@ asmlinkage int sunos_brk(unsigned long b
*/
if (brk <= current->mm->brk) {
current->mm->brk = brk;
- do_munmap(newbrk, oldbrk-newbrk);
+ do_munmap(newbrk, oldbrk-newbrk, DO_MUNMAP_CLEARPTE);
goto out;
}
/*
--- linux-2.2.6/arch/mips/kernel/sysirix.c.jj Fri Mar 12 08:25:14 1999
+++ linux-2.2.6/arch/mips/kernel/sysirix.c Fri Apr 23 16:54:03 1999
@@ -551,7 +551,7 @@ asmlinkage int irix_brk(unsigned long br
*/
if (brk <= current->mm->brk) {
mm->brk = brk;
- do_munmap(newbrk, oldbrk-newbrk);
+ do_munmap(newbrk, oldbrk-newbrk, DO_MUNMAP_CLEARPTE);
ret = 0;
goto out;
}
--- linux-2.2.6/arch/sparc64/kernel/sys_sunos32.c.jj Tue Oct 27 18:52:20 1998
+++ linux-2.2.6/arch/sparc64/kernel/sys_sunos32.c Fri Apr 23 16:54:03 1999
@@ -147,7 +147,7 @@ asmlinkage int sunos_brk(u32 baddr)
/* Always allow shrinking brk. */
if (brk <= current->mm->brk) {
current->mm->brk = brk;
- do_munmap(newbrk, oldbrk-newbrk);
+ do_munmap(newbrk, oldbrk-newbrk, DO_MUNMAP_CLEARPTE);
goto out;
}
/* Check against rlimit and stack.. */

Cheers,
Jakub
___________________________________________________________________
Jakub Jelinek | jj@sunsite.mff.cuni.cz | http://sunsite.mff.cuni.cz
Administrator of SunSITE Czech Republic, MFF, Charles University
___________________________________________________________________
UltraLinux | http://ultra.linux.cz/ | http://ultra.penguin.cz/
Linux version 2.2.5 on a sparc64 machine (3958.37 BogoMips)
___________________________________________________________________

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/