patch for 2.1.104 mmap.c

Bill Hawes (whawes@transmeta.com)
Sat, 06 Jun 1998 11:19:27 -0700


This is a multi-part message in MIME format.
--------------73B718ABB2345FC2D758E814
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Hi Linus,

The attached patch adds mmap semaphore protection to the mmap and munmap
operations in mmap.c. The code is structured so that the required vma
structs are allocated before acquiring the mmap semaphore, and then all
of the vma manipulations are done while holding the semaphore.

The patch also corrects a couple of bugs; in munmap the test for
map_count greater than the maximum was being made after having modified
the vma list, and an error exit would leave the list in an inconsistent
state. In do_mmap there was a minor problem with incrementing locked_vm
only if VM_LOCKED but not VM_IO was set; elsewhere in the kernel only
the VM_LOCKED flag is tested.

This patch is a first step to protecting all of the mmap operations with
the semaphore, and is important to ensure thread safety for memory
mapping operations.

Regards,
Bill

--------------73B718ABB2345FC2D758E814
Content-Type: text/plain; charset=us-ascii; name="mmap_104-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="mmap_104-patch"

--- linux-2.1.104/include/linux/mm.h.old Wed Apr 8 22:17:13 1998
+++ linux-2.1.104/include/linux/mm.h Fri Jun 5 19:12:14 1998
@@ -300,7 +300,9 @@
/* mmap.c */
extern void vma_init(void);
-extern unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags, unsigned long off);
+extern unsigned long do_mmap(struct file *, unsigned long, size_t,
+ unsigned long, unsigned long, unsigned long);
extern void merge_segments(struct mm_struct *, unsigned long, unsigned long);
+extern void merge_segments_locked(struct mm_struct *, unsigned long,
+ unsigned long);
extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void exit_mmap(struct mm_struct *);
--- linux-2.1.104/mm/mmap.c.old Fri Feb 27 14:02:06 1998
+++ linux-2.1.104/mm/mmap.c Fri Jun 5 16:58:13 1998
@@ -48,6 +48,8 @@

int sysctl_overcommit_memory;

+static int do_munmap_locked(unsigned long, size_t, struct vm_area_struct **);
+
/* Check that a process has enough memory to allocate a
* new virtual mapping.
*/
@@ -86,6 +88,18 @@
}
}

+/*
+ * Unlink from the share list and free a vma struct.
+ */
+static inline void free_vma(struct mm_struct *mm, struct vm_area_struct *mpnt)
+{
+ mm->map_count--;
+ remove_shared_vm_struct(mpnt);
+ if (mpnt->vm_file)
+ fput(mpnt->vm_file);
+ kmem_cache_free(vm_area_cachep, mpnt);
+}
+
asmlinkage unsigned long sys_brk(unsigned long brk)
{
unsigned long rlim, retval;
@@ -157,11 +171,11 @@
#undef _trans
}

-unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
+unsigned long do_mmap(struct file * file, unsigned long addr, size_t len,
unsigned long prot, unsigned long flags, unsigned long off)
{
struct mm_struct * mm = current->mm;
- struct vm_area_struct * vma;
+ struct vm_area_struct * vma, * extra;
int correct_wcount = 0, error;

if ((len = PAGE_ALIGN(len)) == 0)
@@ -174,10 +188,6 @@
if (off + len < off)
return -EINVAL;

- /* Too many mappings? */
- if (mm->map_count > MAX_MAP_COUNT)
- return -ENOMEM;
-
/* mlock MCL_FUTURE? */
if (mm->def_flags & VM_LOCKED) {
unsigned long locked = mm->locked_vm << PAGE_SHIFT;
@@ -186,53 +196,70 @@
return -EAGAIN;
}

+ /*
+ * We need at least one vma, and may need an extra one if we
+ * unmap a hole. Get them now before acquiring the semaphore.
+ */
+ error = -ENOMEM;
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!vma)
+ goto out;
+ extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ /* (allocation will be checked later) */
+
+ down(&mm->mmap_sem);
+
+ /* Too many mappings? */
+ if (mm->map_count > MAX_MAP_COUNT)
+ goto out_free;
+
/* Do simple checking here so the lower-level routines won't have
- * to. we assume access permissions have been handled by the open
+ * to. We assume access permissions have been handled by the open
* of the memory object, so we don't do any here.
*/
+ error = -EINVAL;
if (file != NULL) {
switch (flags & MAP_TYPE) {
case MAP_SHARED:
+ error = -EACCES;
if ((prot & PROT_WRITE) && !(file->f_mode & 2))
- return -EACCES;
+ goto out_free;

- /* make sure there are no mandatory locks on the file. */
+ /* Verify there are no mandatory locks on the file. */
+ error = -EAGAIN;
if (locks_verify_locked(file->f_dentry->d_inode))
- return -EAGAIN;
+ goto out_free;
/* fall through */
case MAP_PRIVATE:
+ error = -EACCES;
if (!(file->f_mode & 1))
- return -EACCES;
+ goto out_free;
break;

default:
- return -EINVAL;
+ goto out_free;
}
} else if ((flags & MAP_TYPE) != MAP_PRIVATE)
- return -EINVAL;
+ goto out_free;

- /* Obtain the address to map to. we verify (or select) it and ensure
+ /* Obtain the address to map to. We verify (or select) it and ensure
* that it represents a valid section of the address space.
*/
if (flags & MAP_FIXED) {
+ error = -EINVAL;
if (addr & ~PAGE_MASK)
- return -EINVAL;
+ goto out_free;
} else {
+ error = -ENOMEM;
addr = get_unmapped_area(addr, len);
if (!addr)
- return -ENOMEM;
+ goto out_free;
}

/* Determine the object being mapped and call the appropriate
- * specific mapper. the address has already been validated, but
+ * specific mapper. The address has already been validated, but
* not unmapped, but the maps are removed from the list.
*/
- if (file && (!file->f_op || !file->f_op->mmap))
- return -ENODEV;
-
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!vma)
- return -ENOMEM;

vma->vm_mm = mm;
vma->vm_start = addr;
@@ -240,6 +267,10 @@
vma->vm_flags = vm_flags(prot,flags) | mm->def_flags;

if (file) {
+ error = -ENODEV;
+ if (!file->f_op || !file->f_op->mmap)
+ goto out_free;
+
if (file->f_mode & 1)
vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (flags & MAP_SHARED) {
@@ -265,21 +296,22 @@
vma->vm_file = NULL;
vma->vm_pte = 0;

- /* Clear old maps */
- error = -ENOMEM;
- if (do_munmap(addr, len))
- goto free_vma;
+ /* Clear old maps. */
+ error = do_munmap_locked(addr, len, &extra);
+ if (error)
+ goto out_free;

+ error = -ENOMEM;
/* Check against address space limit. */
if ((mm->total_vm << PAGE_SHIFT) + len
> current->rlim[RLIMIT_AS].rlim_cur)
- goto free_vma;
+ goto out_free;

/* Private writable mapping? Check memory availability.. */
if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
!(flags & MAP_NORESERVE) &&
!vm_enough_memory(len >> PAGE_SHIFT))
- goto free_vma;
+ goto out_free;

error = 0;
if (file) {
@@ -298,13 +330,12 @@
}
if (!error)
error = file->f_op->mmap(file, vma);
-
}
/* Fix up the count if necessary, then check for an error */
if (correct_wcount)
file->f_dentry->d_inode->i_writecount++;
if (error)
- goto free_vma;
+ goto out_free;

/*
* merge_segments may merge our vma, so we can't refer to it
@@ -313,12 +344,18 @@
flags = vma->vm_flags;
addr = vma->vm_start; /* can addr have changed?? */
insert_vm_struct(mm, vma);
- merge_segments(mm, vma->vm_start, vma->vm_end);
-
+ merge_segments_locked(mm, vma->vm_start, vma->vm_end);
+
mm->total_vm += len >> PAGE_SHIFT;
- if ((flags & VM_LOCKED) && !(flags & VM_IO)) {
- unsigned long start = addr;
+ if (flags & VM_LOCKED)
mm->locked_vm += len >> PAGE_SHIFT;
+ /*
+ * Safe to unlock now ... all done manipulating vma lists.
+ */
+ up(&mm->mmap_sem);
+
+ if ((flags & (VM_LOCKED | VM_IO)) == VM_LOCKED) {
+ unsigned long start = addr;
do {
char c;
get_user(c,(char *) start);
@@ -329,8 +366,13 @@
}
return addr;

-free_vma:
+out_free:
+ up(&mm->mmap_sem);
+
+ if (extra)
+ kmem_cache_free(vm_area_cachep, extra);
kmem_cache_free(vm_area_cachep, vma);
+out:
return error;
}

@@ -384,12 +426,13 @@
static int unmap_fixup(struct vm_area_struct *area, unsigned long addr,
size_t len, struct vm_area_struct **extra)
{
+ struct mm_struct * mm = current->mm;
struct vm_area_struct *mpnt;
unsigned long end = addr + len;

- area->vm_mm->total_vm -= len >> PAGE_SHIFT;
+ mm->total_vm -= len >> PAGE_SHIFT;
if (area->vm_flags & VM_LOCKED)
- area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
+ mm->locked_vm -= len >> PAGE_SHIFT;

/* Unmapping the whole area. */
if (addr == area->vm_start && end == area->vm_end) {
@@ -425,7 +468,7 @@
if (mpnt->vm_ops && mpnt->vm_ops->open)
mpnt->vm_ops->open(mpnt);
area->vm_end = addr; /* Truncate area */
- insert_vm_struct(current->mm, mpnt);
+ insert_vm_struct(mm, mpnt);
}

/* Close the current area ... */
@@ -438,98 +481,82 @@
/* ... then reopen and reinsert. */
if (area->vm_ops && area->vm_ops->open)
area->vm_ops->open(area);
- insert_vm_struct(current->mm, area);
+ insert_vm_struct(mm, area);
return 1;
}

-asmlinkage int sys_munmap(unsigned long addr, size_t len)
-{
- int ret;
-
- lock_kernel();
- ret = do_munmap(addr, len);
- unlock_kernel();
- return ret;
-}
-
-/* Munmap is split into 2 main parts -- this part which finds
- * what needs doing, and the areas themselves, which do the
- * work. This now handles partial unmappings.
- * Jeremy Fitzhardine <jeremy@sw.oz.au>
+/*
+ * Find the areas that need to be unmapped. The extra vma
+ * has been allocated (but not checked), and the pointer
+ * will be cleared if the memory is used.
+ *
+ * Note: called with the mmap semaphore held.
*/
-int do_munmap(unsigned long addr, size_t len)
+static int do_munmap_locked(unsigned long addr, size_t len,
+ struct vm_area_struct **extra)
{
- struct mm_struct * mm;
- struct vm_area_struct *mpnt, *next, *free, *extra;
- int freed;
-
- if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
- return -EINVAL;
-
- if ((len = PAGE_ALIGN(len)) == 0)
- return 0;
+ struct mm_struct * mm = current->mm;
+ unsigned long extent = addr + len;
+ struct vm_area_struct *mpnt, *free;
+ int error;

- /* Check if this memory area is ok - put it on the temporary
- * list if so.. The checks here are pretty simple --
- * every area affected in some way (by any overlap) is put
- * on the list. If nothing is put on, nothing is affected.
+ /*
+ * Find the first area potentially affected by the unmapping.
*/
- mm = current->mm;
mpnt = mm->mmap;
while(mpnt && mpnt->vm_end <= addr)
mpnt = mpnt->vm_next;
if (!mpnt)
- return 0;
+ goto out_done;

/*
- * We may need one additional vma to fix up the mappings ...
- * and this is the last chance for an easy error exit.
+ * Check whether the unmapping will create a hole,
+ * and if so make sure we can handle it.
*/
- extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!extra)
- return -ENOMEM;
-
- next = mpnt->vm_next;
+ if (mpnt->vm_start < addr && extent < mpnt->vm_end) {
+ error = -ENOMEM;
+ if (!*extra)
+ goto out;
+ if (mm->map_count > MAX_MAP_COUNT)
+ goto out;
+ }

- /* we have mpnt->vm_next = next and addr < mpnt->vm_end */
+ /*
+ * Check if this memory area is ok - put it on the temporary
+ * list if so.. The checks here are pretty simple --
+ * every area affected in some way (by any overlap) is put
+ * on the list. If nothing is put on, nothing is affected.
+ *
+ * Note: invariants mpnt->vm_next = next and addr < mpnt->vm_end
+ */
free = NULL;
- for ( ; mpnt && mpnt->vm_start < addr+len; ) {
+ while (mpnt && mpnt->vm_start < extent) {
struct vm_area_struct *next = mpnt->vm_next;

- if(mpnt->vm_next)
- mpnt->vm_next->vm_pprev = mpnt->vm_pprev;
- *mpnt->vm_pprev = mpnt->vm_next;
+ if (next)
+ next->vm_pprev = mpnt->vm_pprev;
+ *mpnt->vm_pprev = next;

mpnt->vm_next = free;
free = mpnt;
mpnt = next;
}

- if (free && (free->vm_start < addr) && (free->vm_end > addr+len)) {
- if (mm->map_count > MAX_MAP_COUNT) {
- kmem_cache_free(vm_area_cachep, extra);
- return -ENOMEM;
- }
- }
-
/* Ok - we have the memory areas we should free on the 'free' list,
* so release them, and unmap the page range..
* If the one of the segments is only being partially unmapped,
* it will put new vm_area_struct(s) into the address space.
*/
- freed = 0;
while ((mpnt = free) != NULL) {
unsigned long st, end, size;

free = free->vm_next;
- freed = 1;

mm->map_count--;
remove_shared_vm_struct(mpnt);

st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
- end = addr+len;
- end = end > mpnt->vm_end ? mpnt->vm_end : end;
+ end = extent > mpnt->vm_end ? mpnt->vm_end : extent;
size = end - st;

if (mpnt->vm_ops && mpnt->vm_ops->unmap)
@@ -542,17 +569,58 @@
/*
* Fix the mapping, and free the old area if it wasn't reused.
*/
- if (!unmap_fixup(mpnt, st, size, &extra))
+ if (!unmap_fixup(mpnt, st, size, extra))
kmem_cache_free(vm_area_cachep, mpnt);
+ mm->mmap_cache = NULL; /* Kill the cache. */
}
+out_done:
+ error = 0;
+out:
+ return error;
+}
+
+/*
+ * Munmap is split into two main parts: do_munmap_locked, which
+ * finds what needs doing, and unmap_fixup, which does the work.
+ * This now handles partial unmappings.
+ * Jeremy Fitzhardine <jeremy@sw.oz.au>
+ */
+int do_munmap(unsigned long addr, size_t len)
+{
+ struct mm_struct * mm = current->mm;
+ struct vm_area_struct *extra;
+ int error;
+
+ if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
+ return -EINVAL;
+
+ if ((len = PAGE_ALIGN(len)) == 0)
+ return 0;
+
+ /*
+ * We may need one additional vma to fix up the mappings ...
+ * get it now before acquiring the semaphore.
+ */
+ extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+
+ down(&mm->mmap_sem);
+ error = do_munmap_locked(addr, len, &extra);
+ up(&mm->mmap_sem);

/* Release the extra vma struct if it wasn't used */
if (extra)
kmem_cache_free(vm_area_cachep, extra);
+ return error;
+}

- if (freed)
- mm->mmap_cache = NULL; /* Kill the cache. */
- return 0;
+asmlinkage int sys_munmap(unsigned long addr, size_t len)
+{
+ int ret;
+
+ lock_kernel();
+ ret = do_munmap(addr, len);
+ unlock_kernel();
+ return ret;
}

/* Release all mmaps. */
@@ -625,17 +693,29 @@
}
}

-/* Merge the list of memory segments if possible.
- * Redundant vm_area_structs are freed.
- * This assumes that the list is ordered by address.
- * We don't need to traverse the entire list, only those segments
- * which intersect or are adjacent to a given interval.
+/*
+ * Externally visible entry point when semaphore not held.
*/
-void merge_segments (struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
+void merge_segments (struct mm_struct * mm, unsigned long start_addr,
+ unsigned long end_addr)
{
- struct vm_area_struct *prev, *mpnt, *next;
-
down(&mm->mmap_sem);
+ merge_segments_locked(mm, start_addr, end_addr);
+ up(&mm->mmap_sem);
+}
+
+/*
+ * Merge the list of memory segments if possible, freeing any redundant
+ * vm_area_structs. (This assumes that the list is ordered by address.)
+ * We don't need to traverse the entire list, only those segments that
+ * intersect or are adjacent to a given interval.
+ *
+ * Note: the caller must hold the mmap semaphore.
+ */
+void merge_segments_locked(struct mm_struct * mm, unsigned long start_addr,
+ unsigned long end_addr)
+{
+ struct vm_area_struct *prev, *mpnt, *next;

prev = NULL;
mpnt = mm->mmap;
@@ -644,7 +724,7 @@
mpnt = mpnt->vm_next;
}
if (!mpnt)
- goto no_vma;
+ return;

next = mpnt->vm_next;

@@ -682,9 +762,9 @@
* big segment can possibly merge with the next one.
* The old unused mpnt is freed.
*/
- if(mpnt->vm_next)
- mpnt->vm_next->vm_pprev = mpnt->vm_pprev;
- *mpnt->vm_pprev = mpnt->vm_next;
+ if(next)
+ next->vm_pprev = mpnt->vm_pprev;
+ *mpnt->vm_pprev = next;

prev->vm_end = mpnt->vm_end;
if (mpnt->vm_ops && mpnt->vm_ops->close) {
@@ -692,16 +772,10 @@
mpnt->vm_start = mpnt->vm_end;
mpnt->vm_ops->close(mpnt);
}
- mm->map_count--;
- remove_shared_vm_struct(mpnt);
- if (mpnt->vm_file)
- fput(mpnt->vm_file);
- kmem_cache_free(vm_area_cachep, mpnt);
+ free_vma(mm, mpnt);
mpnt = prev;
}
mm->mmap_cache = NULL; /* Kill the cache. */
-no_vma:
- up(&mm->mmap_sem);
}

__initfunc(void vma_init(void))

--------------73B718ABB2345FC2D758E814--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu