scaled back 2.1.50 dup_mmap patch

Bill Hawes (whawes@star.net)
Mon, 18 Aug 1997 19:08:02 -0400


This is a multi-part message in MIME format.
--------------B64C25A18B4F3FDC82D1E56D
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

I've made a less ambitious version of my dup_mmap patch for 2.1.50,
concentrating on fixing the memory, page count, and dentry leaks rather
than the race condition with clone processes changing the mmap. The
problem is that an exit due to copy_page_range failure doesn't free the
vma, leaves the dentry count elevated, and leaves some number of pages
with elevated use counts.

The patch gets the return code from copy_page_range but defers checking
for failure until the vma has been linked into the mmap list. This
allows exit_mmap to do the cleanup; as far as I can tell, exit_mmap
shouldn't have any problem freeing resources from the partially
completed vma.

I tested the patch by inducing failures in pte_alloc, and things seemed
to work OK. Comments welcome ...

Regards,
Bill
--------------B64C25A18B4F3FDC82D1E56D
Content-Type: text/plain; charset=us-ascii; name="fork_50-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="fork_50-patch"

--- kernel/fork.c.old Thu Aug 14 18:44:06 1997
+++ kernel/fork.c Sat Aug 16 10:00:11 1997
@@ -206,6 +206,7 @@
static inline int dup_mmap(struct mm_struct * mm)
{
struct vm_area_struct * mpnt, *tmp, **pprev;
+ int retval;

mm->mmap = mm->mmap_cache = NULL;
flush_cache_mm(current->mm);
@@ -213,19 +214,17 @@
for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
struct dentry *dentry;

+ retval = -ENOMEM;
tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!tmp) {
- exit_mmap(mm);
- flush_tlb_mm(current->mm);
- return -ENOMEM;
- }
+ if (!tmp)
+ goto fail_nomem;
*tmp = *mpnt;
tmp->vm_flags &= ~VM_LOCKED;
tmp->vm_mm = mm;
tmp->vm_next = NULL;
dentry = tmp->vm_dentry;
if (dentry) {
- dentry->d_count++;
+ dget(dentry);
if (tmp->vm_flags & VM_DENYWRITE)
dentry->d_inode->i_writecount--;

@@ -236,60 +235,82 @@
mpnt->vm_next_share = tmp;
tmp->vm_pprev_share = &mpnt->vm_next_share;
}
- if (copy_page_range(mm, current->mm, tmp)) {
- exit_mmap(mm);
- flush_tlb_mm(current->mm);
- return -ENOMEM;
- }
- if (tmp->vm_ops && tmp->vm_ops->open)
+ /* Copy the pages, but defer checking for errors */
+ retval = copy_page_range(mm, current->mm, tmp);
+#if 1
+if (retval)
+printk("dup_mmap: copy_page_range failed\n");
+#endif
+ if (!retval && tmp->vm_ops && tmp->vm_ops->open)
tmp->vm_ops->open(tmp);

- /* Ok, finally safe to link it in. */
+ /*
+ * Link in the new vma even if an error occurred,
+ * so that exit_mmap() can clean up the mess.
+ */
if((tmp->vm_next = *pprev) != NULL)
(*pprev)->vm_pprev = &tmp->vm_next;
*pprev = tmp;
tmp->vm_pprev = pprev;

pprev = &tmp->vm_next;
+ if (retval)
+ goto fail_nomem;
}
flush_tlb_mm(current->mm);
return 0;
+
+fail_nomem:
+ exit_mmap(mm);
+ flush_tlb_mm(current->mm);
+ return retval;
}

static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
{
- if (!(clone_flags & CLONE_VM)) {
- struct mm_struct * mm = kmem_cache_alloc(mm_cachep, SLAB_KERNEL);
- if (!mm)
- return -1;
- *mm = *current->mm;
- init_new_context(mm);
- mm->count = 1;
- mm->def_flags = 0;
- mm->mmap_sem = MUTEX;
-
- /* It has not run yet, so cannot be present in anyone's
- * cache or tlb.
- */
- mm->cpu_vm_mask = 0;
+ struct mm_struct * mm;
+ int retval;

- tsk->mm = mm;
- tsk->min_flt = tsk->maj_flt = 0;
- tsk->cmin_flt = tsk->cmaj_flt = 0;
- tsk->nswap = tsk->cnswap = 0;
- if (new_page_tables(tsk))
- goto free_mm;
- if (dup_mmap(mm)) {
- free_page_tables(mm);
-free_mm:
- kmem_cache_free(mm_cachep, mm);
- return -1;
- }
+ if (clone_flags & CLONE_VM) {
+ current->mm->count++;
+ SET_PAGE_DIR(tsk, current->mm->pgd);
return 0;
}
- current->mm->count++;
- SET_PAGE_DIR(tsk, current->mm->pgd);
+
+ retval = -ENOMEM;
+ mm = kmem_cache_alloc(mm_cachep, SLAB_KERNEL);
+ if (!mm)
+ goto fail_nomem;
+ *mm = *current->mm;
+ init_new_context(mm);
+ mm->count = 1;
+ mm->def_flags = 0;
+ mm->mmap_sem = MUTEX;
+
+ /* It has not run yet, so cannot be present in anyone's
+ * cache or tlb.
+ */
+ mm->cpu_vm_mask = 0;
+
+ tsk->mm = mm;
+ tsk->min_flt = tsk->maj_flt = 0;
+ tsk->cmin_flt = tsk->cmaj_flt = 0;
+ tsk->nswap = tsk->cnswap = 0;
+ retval = new_page_tables(tsk);
+ if (retval)
+ goto free_mm;
+ retval = dup_mmap(mm);
+ if (retval)
+ goto free_pt;
return 0;
+
+free_pt:
+ free_page_tables(mm);
+free_mm:
+ tsk->mm = NULL;
+ kmem_cache_free(mm_cachep, mm);
+fail_nomem:
+ return retval;
}

static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
@@ -478,9 +499,9 @@
bad_fork_cleanup_sighand:
exit_sighand(p);
bad_fork_cleanup_fs:
- exit_fs(p);
+ exit_fs(p); /* blocking */
bad_fork_cleanup_files:
- exit_files(p);
+ exit_files(p); /* blocking */
bad_fork_cleanup:
charge_uid(current, -1);
if (p->exec_domain && p->exec_domain->module)

--------------B64C25A18B4F3FDC82D1E56D--