preliminary patch for 2.1.103 mmap.c

Bill Hawes (whawes@star.net)
Sat, 30 May 1998 12:05:02 -0400


This is a multi-part message in MIME format.
--------------F12A3E9D2308C6C36738BFD4
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

I'm planning to correct some long standing problems in the memory
mapping code by providing protection for manipulating the vma lists.
Currently only the merge_segments routine uses the mmap semaphore, which
leaves open an number of races with observing and changing the vma
lists. This has been discussed previously on the kernel list, but as 2.2
is getting close, I think we really need to get this area fixed.

The attached patch reworks the code for the mmap and munmap cases by
acquiring the mmap semaphore before looking at the vma lists. It first
allocates the required vma structs (one or two) as needed, then locks
the mm struct before completing the tests and changing the lists. (Note
that we need to get the semaphore before even looking at the lists, as
otherwise a blocking operation might allow the lists to change.)

Additional work needs to be done for the other memory mapping calls
(mlock, mprotect, etc.), but I'd like to get some feedback on this
approach before proceeding. The patch appears to work OK (I'm running it
now), but will need further testing.

The patch also corrects a problem in munmap introduced when the MAX_MAPS
checks were added. The test for creating a hole when we already have the
maximum maps was being made too late; if the error exit is taken, it
leaves the vma lists in an incorrect state.

Comments and suggestions welcome ...

Regards,
Bill
--------------F12A3E9D2308C6C36738BFD4
Content-Type: text/plain; charset=us-ascii; name="mmap_103-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="mmap_103-patch"

--- linux-2.1.103/mm/mmap.c.old Sat Mar 7 11:32:19 1998
+++ linux-2.1.103/mm/mmap.c Sat May 30 11:12:43 1998
@@ -48,6 +48,10 @@

int sysctl_overcommit_memory;

+static int do_munmap_locked(unsigned long, size_t, struct vm_area_struct *);
+static void merge_segments_locked(struct mm_struct *, unsigned long,
+ unsigned long);
+
/* Check that a process has enough memory to allocate a
* new virtual mapping.
*/
@@ -86,6 +90,18 @@
}
}

+/*
+ * Unlink from the share list and free a vma struct.
+ */
+static inline void free_vma(struct mm_struct *mm, struct vm_area_struct *mpnt)
+{
+ mm->map_count--;
+ remove_shared_vm_struct(mpnt);
+ if (mpnt->vm_file)
+ fput(mpnt->vm_file);
+ kmem_cache_free(vm_area_cachep, mpnt);
+}
+
asmlinkage unsigned long sys_brk(unsigned long brk)
{
unsigned long rlim, retval;
@@ -161,7 +177,7 @@
unsigned long prot, unsigned long flags, unsigned long off)
{
struct mm_struct * mm = current->mm;
- struct vm_area_struct * vma;
+ struct vm_area_struct * vma, * extra;
int correct_wcount = 0, error;

if ((len = PAGE_ALIGN(len)) == 0)
@@ -186,53 +202,66 @@
return -EAGAIN;
}

+ /*
+ * We need at least one vma, and may need an extra one if we
+ * unmap a hole. Get them now before acquiring the semaphore.
+ */
+ error = -ENOMEM;
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!vma)
+ goto out;
+ extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ /* (allocation will be checked later.) */
+
+ down(&mm->mmap_sem);
+
/* Do simple checking here so the lower-level routines won't have
- * to. we assume access permissions have been handled by the open
+ * to. We assume access permissions have been handled by the open
* of the memory object, so we don't do any here.
*/
+ error = -EINVAL;
if (file != NULL) {
switch (flags & MAP_TYPE) {
case MAP_SHARED:
+ error = -EACCES;
if ((prot & PROT_WRITE) && !(file->f_mode & 2))
- return -EACCES;
+ goto out_free;

- /* make sure there are no mandatory locks on the file. */
+ /* Verify there are no mandatory locks on the file. */
+ error = -EAGAIN;
if (locks_verify_locked(file->f_dentry->d_inode))
- return -EAGAIN;
+ goto out_free;
/* fall through */
case MAP_PRIVATE:
+ error = -EACCES;
if (!(file->f_mode & 1))
- return -EACCES;
+ goto out_free;
break;

default:
- return -EINVAL;
+ goto out_free;
}
} else if ((flags & MAP_TYPE) != MAP_PRIVATE)
- return -EINVAL;
+ goto out_free;

- /* Obtain the address to map to. we verify (or select) it and ensure
+ /* Obtain the address to map to. We verify (or select) it and ensure
* that it represents a valid section of the address space.
*/
if (flags & MAP_FIXED) {
+ error = -EINVAL;
if (addr & ~PAGE_MASK)
- return -EINVAL;
+ goto out_free;
} else {
+ error = -ENOMEM;
addr = get_unmapped_area(addr, len);
if (!addr)
- return -ENOMEM;
+ goto out_free;
}

/* Determine the object being mapped and call the appropriate
* specific mapper. the address has already been validated, but
* not unmapped, but the maps are removed from the list.
*/
- if (file && (!file->f_op || !file->f_op->mmap))
- return -ENODEV;
-
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!vma)
- return -ENOMEM;

vma->vm_mm = mm;
vma->vm_start = addr;
@@ -240,6 +269,10 @@
vma->vm_flags = vm_flags(prot,flags) | mm->def_flags;

if (file) {
+ error = -ENODEV;
+ if (!file->f_op || !file->f_op->mmap)
+ goto out_free;
+
if (file->f_mode & 1)
vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (flags & MAP_SHARED) {
@@ -266,20 +299,21 @@
vma->vm_pte = 0;

/* Clear old maps */
- error = -ENOMEM;
- if (do_munmap(addr, len))
- goto free_vma;
+ error = do_munmap_locked(addr, len, extra);
+ if (error)
+ goto out_free;

+ error = -ENOMEM;
/* Check against address space limit. */
if ((mm->total_vm << PAGE_SHIFT) + len
> current->rlim[RLIMIT_AS].rlim_cur)
- goto free_vma;
+ goto out_free;

/* Private writable mapping? Check memory availability.. */
if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
!(flags & MAP_NORESERVE) &&
!vm_enough_memory(len >> PAGE_SHIFT))
- goto free_vma;
+ goto out_free;

error = 0;
if (file) {
@@ -298,13 +332,12 @@
}
if (!error)
error = file->f_op->mmap(file, vma);
-
}
/* Fix up the count if necessary, then check for an error */
if (correct_wcount)
file->f_dentry->d_inode->i_writecount++;
if (error)
- goto free_vma;
+ goto out_free;

/*
* merge_segments may merge our vma, so we can't refer to it
@@ -313,10 +346,14 @@
flags = vma->vm_flags;
addr = vma->vm_start; /* can addr have changed?? */
insert_vm_struct(mm, vma);
- merge_segments(mm, vma->vm_start, vma->vm_end);
+ merge_segments_locked(mm, vma->vm_start, vma->vm_end);
+ /*
+ * Safe to unlock now ... all done manipulating vma lists.
+ */
+ up(&mm->mmap_sem);

mm->total_vm += len >> PAGE_SHIFT;
- if ((flags & VM_LOCKED) && !(flags & VM_IO)) {
+ if ((flags & (VM_LOCKED | VM_IO)) == VM_LOCKED) {
unsigned long start = addr;
mm->locked_vm += len >> PAGE_SHIFT;
do {
@@ -329,8 +366,13 @@
}
return addr;

-free_vma:
+out_free:
+ up(&mm->mmap_sem);
+
kmem_cache_free(vm_area_cachep, vma);
+ if (extra)
+ kmem_cache_free(vm_area_cachep, vma);
+out:
return error;
}

@@ -442,94 +484,77 @@
return 1;
}

-asmlinkage int sys_munmap(unsigned long addr, size_t len)
-{
- int ret;
-
- lock_kernel();
- ret = do_munmap(addr, len);
- unlock_kernel();
- return ret;
-}
-
-/* Munmap is split into 2 main parts -- this part which finds
- * what needs doing, and the areas themselves, which do the
- * work. This now handles partial unmappings.
- * Jeremy Fitzhardine <jeremy@sw.oz.au>
+/*
+ * Find the areas that need to be unmapped.
+ *
+ * Note: called with the mmap semaphore held. The extra vma
+ * has been allocated but not checked.
*/
-int do_munmap(unsigned long addr, size_t len)
+static int do_munmap_locked(unsigned long addr, size_t len,
+ struct vm_area_struct *extra)
{
- struct mm_struct * mm;
- struct vm_area_struct *mpnt, *next, *free, *extra;
- int freed;
-
- if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
- return -EINVAL;
-
- if ((len = PAGE_ALIGN(len)) == 0)
- return 0;
+ struct mm_struct * mm = current->mm;
+ unsigned long extent = addr + len;
+ struct vm_area_struct *mpnt, *free;
+ int error;

- /* Check if this memory area is ok - put it on the temporary
- * list if so.. The checks here are pretty simple --
- * every area affected in some way (by any overlap) is put
- * on the list. If nothing is put on, nothing is affected.
+ /*
+ * Find the first area potentially affected by the unmapping.
*/
- mm = current->mm;
mpnt = mm->mmap;
while(mpnt && mpnt->vm_end <= addr)
mpnt = mpnt->vm_next;
if (!mpnt)
- return 0;
+ goto out_done;

/*
- * We may need one additional vma to fix up the mappings ...
- * and this is the last chance for an easy error exit.
+ * Check whether the unmapping will create a hole,
+ * and if so make sure we can handle it.
*/
- extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!extra)
- return -ENOMEM;
-
- next = mpnt->vm_next;
+ if (mpnt->vm_start < addr && extent < mpnt->vm_end) {
+ error = -ENOMEM;
+ if (!extra)
+ goto out_free;
+ if (mm->map_count > MAX_MAP_COUNT)
+ goto out_free;
+ }

- /* we have mpnt->vm_next = next and addr < mpnt->vm_end */
+ /*
+ * Check if this memory area is ok - put it on the temporary
+ * list if so.. The checks here are pretty simple --
+ * every area affected in some way (by any overlap) is put
+ * on the list. If nothing is put on, nothing is affected.
+ *
+ * Note: invariants mpnt->vm_next = next and addr < mpnt->vm_end
+ */
free = NULL;
- for ( ; mpnt && mpnt->vm_start < addr+len; ) {
+ while (mpnt && mpnt->vm_start < extent) {
struct vm_area_struct *next = mpnt->vm_next;

- if(mpnt->vm_next)
- mpnt->vm_next->vm_pprev = mpnt->vm_pprev;
- *mpnt->vm_pprev = mpnt->vm_next;
+ if (next)
+ next->vm_pprev = mpnt->vm_pprev;
+ *mpnt->vm_pprev = next;

mpnt->vm_next = free;
free = mpnt;
mpnt = next;
}

- if (free && (free->vm_start < addr) && (free->vm_end > addr+len)) {
- if (mm->map_count > MAX_MAP_COUNT) {
- kmem_cache_free(vm_area_cachep, extra);
- return -ENOMEM;
- }
- }
-
/* Ok - we have the memory areas we should free on the 'free' list,
* so release them, and unmap the page range..
* If the one of the segments is only being partially unmapped,
* it will put new vm_area_struct(s) into the address space.
*/
- freed = 0;
while ((mpnt = free) != NULL) {
unsigned long st, end, size;

free = free->vm_next;
- freed = 1;

mm->map_count--;
remove_shared_vm_struct(mpnt);

st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
- end = addr+len;
- end = end > mpnt->vm_end ? mpnt->vm_end : end;
+ end = extent > mpnt->vm_end ? mpnt->vm_end : extent;
size = end - st;

if (mpnt->vm_ops && mpnt->vm_ops->unmap)
@@ -544,15 +569,57 @@
*/
if (!unmap_fixup(mpnt, st, size, &extra))
kmem_cache_free(vm_area_cachep, mpnt);
+ mm->mmap_cache = NULL; /* Kill the cache. */
}
+out_done:
+ error = 0;

+out_free:
/* Release the extra vma struct if it wasn't used */
if (extra)
kmem_cache_free(vm_area_cachep, extra);

- if (freed)
- mm->mmap_cache = NULL; /* Kill the cache. */
- return 0;
+ return error;
+}
+
+/*
+ * Munmap is split into two main parts: do_munmap_locked, which
+ * finds what needs doing, and unmap_fixup, which does the work.
+ * This now handles partial unmappings.
+ * Jeremy Fitzhardine <jeremy@sw.oz.au>
+ */
+int do_munmap(unsigned long addr, size_t len)
+{
+ struct mm_struct * mm = current->mm;
+ struct vm_area_struct *extra;
+ int error;
+
+ if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
+ return -EINVAL;
+
+ if ((len = PAGE_ALIGN(len)) == 0)
+ return 0;
+
+ /*
+ * We may need one additional vma to fix up the mappings ...
+ * get it now before acquiring the semaphore.
+ */
+ extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+
+ down(&mm->mmap_sem);
+ error = do_munmap_locked(addr, len, extra);
+ up(&mm->mmap_sem);
+ return error;
+}
+
+asmlinkage int sys_munmap(unsigned long addr, size_t len)
+{
+ int ret;
+
+ lock_kernel();
+ ret = do_munmap(addr, len);
+ unlock_kernel();
+ return ret;
}

/* Release all mmaps. */
@@ -625,17 +692,29 @@
}
}

-/* Merge the list of memory segments if possible.
- * Redundant vm_area_structs are freed.
- * This assumes that the list is ordered by address.
- * We don't need to traverse the entire list, only those segments
- * which intersect or are adjacent to a given interval.
+/*
+ * Externally visible entry point when semaphore not held.
*/
-void merge_segments (struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
+void merge_segments (struct mm_struct * mm, unsigned long start_addr,
+ unsigned long end_addr)
{
- struct vm_area_struct *prev, *mpnt, *next;
-
down(&mm->mmap_sem);
+ merge_segments_locked(mm, start_addr, end_addr);
+ up(&mm->mmap_sem);
+}
+
+/*
+ * Merge the list of memory segments if possible, freeing any redundant
+ * vm_area_structs. (This assumes that the list is ordered by address.)
+ * We don't need to traverse the entire list, only those segments that
+ * intersect or are adjacent to a given interval.
+ *
+ * Note: the caller must hold the mmap semaphore.
+ */
+static void merge_segments_locked(struct mm_struct * mm,
+ unsigned long start_addr, unsigned long end_addr)
+{
+ struct vm_area_struct *prev, *mpnt, *next;

prev = NULL;
mpnt = mm->mmap;
@@ -644,7 +723,7 @@
mpnt = mpnt->vm_next;
}
if (!mpnt)
- goto no_vma;
+ return;

next = mpnt->vm_next;

@@ -692,16 +771,10 @@
mpnt->vm_start = mpnt->vm_end;
mpnt->vm_ops->close(mpnt);
}
- mm->map_count--;
- remove_shared_vm_struct(mpnt);
- if (mpnt->vm_file)
- fput(mpnt->vm_file);
- kmem_cache_free(vm_area_cachep, mpnt);
+ free_vma(mm, mpnt);
mpnt = prev;
}
mm->mmap_cache = NULL; /* Kill the cache. */
-no_vma:
- up(&mm->mmap_sem);
}

__initfunc(void vma_init(void))

--------------F12A3E9D2308C6C36738BFD4--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu