[patch-2.3.99-pre7-2] SMP-safe vmalloc and friends.

From: Tigran Aivazian (tigran@veritas.com)
Date: Wed May 03 2000 - 07:26:33 EST


Hi Linus,

Here is the same patch but remade (and re-looked-at and retested) against
2.3.99-pre7-2. It adds a rw spinlock vmlist_lock to guard access to vmlist
and makes use of it fs/proc/kcore.c (bug fix!) and in ipc/util.c
(optimization).

Please consider it.

Regards,
Tigran

diff -urN -X dontdiff linux/drivers/char/mem.c vmalloc/drivers/char/mem.c
--- linux/drivers/char/mem.c Wed Apr 12 09:09:20 2000
+++ vmalloc/drivers/char/mem.c Wed May 3 12:54:19 2000
@@ -231,7 +231,8 @@
 {
         unsigned long p = *ppos;
         ssize_t read = 0;
- ssize_t virtr;
+ ssize_t virtr = 0;
+ char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
                 
         if (p < (unsigned long) high_memory) {
                 read = count;
@@ -258,11 +259,27 @@
                 count -= read;
         }
 
- virtr = vread(buf, (char *)p, count);
- if (virtr < 0)
- return virtr;
- *ppos += p + virtr;
- return virtr + read;
+ kbuf = (char *)get_free_page(GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+ while (count > 0) {
+ int len = count;
+
+ if (len > PAGE_SIZE)
+ len = PAGE_SIZE;
+ len = vread(kbuf, (char *)p, len); /* always >= 0 */
+ if (len && copy_to_user(buf, kbuf, len)) {
+ free_page((unsigned long)kbuf);
+ return -EFAULT;
+ }
+ count -= len;
+ buf += len;
+ virtr += len;
+ p += len;
+ }
+ free_page((unsigned long)kbuf);
+ *ppos += p + virtr;
+ return virtr + read;
 }
 
 /*
diff -urN -X dontdiff linux/fs/proc/kcore.c vmalloc/fs/proc/kcore.c
--- linux/fs/proc/kcore.c Sun Feb 27 04:33:07 2000
+++ vmalloc/fs/proc/kcore.c Wed May 3 12:54:19 2000
@@ -315,13 +315,12 @@
         size_t elf_buflen;
         int num_vma;
 
- /* XXX we need to somehow lock vmlist between here
- * and after elf_kcore_store_hdr() returns.
- * For now assume that num_vma does not change (TA)
- */
+ read_lock(&vmlist_lock);
         proc_root_kcore->size = size = get_kcore_size(&num_vma, &elf_buflen);
- if (buflen == 0 || *fpos >= size)
+ if (buflen == 0 || *fpos >= size) {
+ read_unlock(&vmlist_lock);
                 return 0;
+ }
 
         /* trim buflen to not go beyond EOF */
         if (buflen > size - *fpos)
@@ -335,10 +334,13 @@
                 if (buflen < tsz)
                         tsz = buflen;
                 elf_buf = kmalloc(elf_buflen, GFP_ATOMIC);
- if (!elf_buf)
+ if (!elf_buf) {
+ read_unlock(&vmlist_lock);
                         return -ENOMEM;
+ }
                 memset(elf_buf, 0, elf_buflen);
                 elf_kcore_store_hdr(elf_buf, num_vma, elf_buflen);
+ read_unlock(&vmlist_lock);
                 if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
                         kfree(elf_buf);
                         return -EFAULT;
@@ -352,7 +354,8 @@
                 /* leave now if filled buffer already */
                 if (buflen == 0)
                         return acc;
- }
+ } else
+ read_unlock(&vmlist_lock);
 
         /* where page 0 not mapped, write zeros into buffer */
 #if defined (__i386__) || defined (__mc68000__)
diff -urN -X dontdiff linux/include/linux/vmalloc.h vmalloc/include/linux/vmalloc.h
--- linux/include/linux/vmalloc.h Sat Mar 18 20:11:01 2000
+++ vmalloc/include/linux/vmalloc.h Wed May 3 13:08:55 2000
@@ -3,6 +3,7 @@
 
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/spinlock.h>
 
 #include <asm/pgtable.h>
 
@@ -24,6 +25,11 @@
 void vmfree_area_pages(unsigned long address, unsigned long size);
 int vmalloc_area_pages(unsigned long address, unsigned long size);
 
+/* vmlist_lock is a read-write spinlock that protects vmlist
+ * Used in mm/vmalloc.c (get_vm_area() and vfree()) and fs/proc/kcore.c.
+ */
+extern rwlock_t vmlist_lock;
+
 extern struct vm_struct * vmlist;
 #endif
 
diff -urN -X dontdiff linux/ipc/util.c vmalloc/ipc/util.c
--- linux/ipc/util.c Wed Mar 8 17:16:24 2000
+++ vmalloc/ipc/util.c Wed May 3 12:54:19 2000
@@ -159,25 +159,19 @@
 void* ipc_alloc(int size)
 {
         void* out;
- if(size > PAGE_SIZE) {
- lock_kernel();
+ if(size > PAGE_SIZE)
                 out = vmalloc(size);
- unlock_kernel();
- } else {
+ else
                 out = kmalloc(size, GFP_KERNEL);
- }
         return out;
 }
 
 void ipc_free(void* ptr, int size)
 {
- if(size > PAGE_SIZE) {
- lock_kernel();
+ if(size > PAGE_SIZE)
                 vfree(ptr);
- unlock_kernel();
- } else {
+ else
                 kfree(ptr);
- }
 }
 
 /*
diff -urN -X dontdiff linux/mm/vmalloc.c vmalloc/mm/vmalloc.c
--- linux/mm/vmalloc.c Fri Mar 24 11:01:34 2000
+++ vmalloc/mm/vmalloc.c Wed May 3 13:09:07 2000
@@ -3,14 +3,17 @@
  *
  * Copyright (C) 1993 Linus Torvalds
  * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ * SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
  */
 
 #include <linux/malloc.h>
 #include <linux/vmalloc.h>
+#include <linux/spinlock.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 
+rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
 struct vm_struct * vmlist = NULL;
 
 static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned long size)
@@ -163,11 +166,13 @@
         if (!area)
                 return NULL;
         addr = VMALLOC_START;
+ write_lock(&vmlist_lock);
         for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
                 if (size + addr < (unsigned long) tmp->addr)
                         break;
                 addr = tmp->size + (unsigned long) tmp->addr;
                 if (addr > VMALLOC_END-size) {
+ write_unlock(&vmlist_lock);
                         kfree(area);
                         return NULL;
                 }
@@ -177,6 +182,7 @@
         area->size = size + PAGE_SIZE;
         area->next = *p;
         *p = area;
+ write_unlock(&vmlist_lock);
         return area;
 }
 
@@ -190,14 +196,17 @@
                 printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
                 return;
         }
+ write_lock(&vmlist_lock);
         for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) {
                 if (tmp->addr == addr) {
                         *p = tmp->next;
                         vmfree_area_pages(VMALLOC_VMADDR(tmp->addr), tmp->size);
                         kfree(tmp);
+ write_unlock(&vmlist_lock);
                         return;
                 }
         }
+ write_unlock(&vmlist_lock);
         printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", addr);
 }
 
@@ -235,6 +244,7 @@
         if ((unsigned long) addr + count < count)
                 count = -(unsigned long) addr;
 
+ read_lock(&vmlist_lock);
         for (tmp = vmlist; tmp; tmp = tmp->next) {
                 vaddr = (char *) tmp->addr;
                 if (addr >= vaddr + tmp->size - PAGE_SIZE)
@@ -242,7 +252,7 @@
                 while (addr < vaddr) {
                         if (count == 0)
                                 goto finished;
- put_user('\0', buf);
+ *buf = '\0';
                         buf++;
                         addr++;
                         count--;
@@ -251,12 +261,13 @@
                 do {
                         if (count == 0)
                                 goto finished;
- put_user(*addr, buf);
+ *buf = *addr;
                         buf++;
                         addr++;
                         count--;
                 } while (--n > 0);
         }
 finished:
+ read_unlock(&vmlist_lock);
         return buf - buf_start;
 }

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sun May 07 2000 - 21:00:11 EST