[patch-2.4.0-test8-pre1] vwrite() support for /dev/kmem

From: Tigran Aivazian (tigran@veritas.com)
Date: Thu Aug 31 2000 - 09:33:20 EST


Hi Linus,

Now that it is clear that I was completely mistaken and (mis)quoted Alan
Cox when I said "he thinks there are security implications", I am
re-sending the vwrite patch against 2.4.0-test8-pre1. Yes, I remember I
promised to rewrite both vread/vwrite (as they both are ugly) but they say
in russian "better a little bird (e.g. a sparrow) in hand than a big one
(e.g. a swan) in the sky" :)

Regards,
Tigran

PS. While we are at it, I attached some trivial cleanups of mm/memory.c
and mm/vmscan.c.

---------- Forwarded message ----------
Date: Thu, 31 Aug 2000 14:48:21 +0100 (BST)
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
To: Tigran Aivazian <tigran@veritas.com>
Cc: Chris Evans <chris@scary.beasts.org>,
     Matthew Kirkwood <matthew@hairy.beasts.org>, Andi Kleen <ak@suse.de>,
     Alan Cox <alan@lxorguk.ukuu.org.uk>,
     Arnaldo Carvalho de Melo <acme@conectiva.com.br>,
     Richard Gooch <rgooch@atnf.csiro.au>, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] mtrr: s/suser/capable/

> speaking of /dev/kmem, I always wondered why Alan Cox (or someone
> else?) objected to adding vwrite() ability to write to vmalloc'd addresses
> via /dev/kmem and this discussion is as good as any to ask here :)

If it can be done cleanly without races fore 2.4 then I dont see a problem.
I also dont see why it is useful however which is my only objection. You can
just use the physical address it maps to anyway

diff -urN -X dontdiff linux/drivers/char/mem.c vwrite/drivers/char/mem.c
--- linux/drivers/char/mem.c Sat Jun 24 12:44:26 2000
+++ vwrite/drivers/char/mem.c Thu Aug 31 15:22:09 2000
@@ -258,25 +258,27 @@
                 count -= read;
         }
 
- kbuf = (char *)__get_free_page(GFP_KERNEL);
- if (!kbuf)
- return -ENOMEM;
- while (count > 0) {
- int len = count;
-
- if (len > PAGE_SIZE)
- len = PAGE_SIZE;
- len = vread(kbuf, (char *)p, len);
- if (len && copy_to_user(buf, kbuf, len)) {
- free_page((unsigned long)kbuf);
- return -EFAULT;
+ if (count > 0) {
+ kbuf = (char *)__get_free_page(GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+ while (count > 0) {
+ int len = count;
+
+ if (len > PAGE_SIZE)
+ len = PAGE_SIZE;
+ len = vread(kbuf, (char *)p, len);
+ if (len && copy_to_user(buf, kbuf, len)) {
+ free_page((unsigned long)kbuf);
+ return -EFAULT;
+ }
+ count -= len;
+ buf += len;
+ virtr += len;
+ p += len;
                 }
- count -= len;
- buf += len;
- virtr += len;
- p += len;
+ free_page((unsigned long)kbuf);
         }
- free_page((unsigned long)kbuf);
          *ppos = p;
          return virtr + read;
 }
@@ -288,12 +290,56 @@
                           size_t count, loff_t *ppos)
 {
         unsigned long p = *ppos;
+ ssize_t write = 0;
+ ssize_t virtr = 0;
+ char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
+
+ if (p < (unsigned long) high_memory) {
+ write = count;
+ if (count > (unsigned long) high_memory - p)
+ write = (unsigned long) high_memory - p;
 
- if (p >= (unsigned long) high_memory)
- return 0;
- if (count > (unsigned long) high_memory - p)
- count = (unsigned long) high_memory - p;
- return do_write_mem(file, (void*)p, p, buf, count, ppos);
+#if defined(__sparc__) || defined(__mc68000__)
+ /* we don't have page 0 mapped on sparc and m68k.. */
+ if (p < PAGE_SIZE && write > 0) {
+ size_t tmp = PAGE_SIZE - p;
+ if (tmp > write) tmp = write;
+ buf += tmp;
+ p += tmp;
+ write -= tmp;
+ count -= tmp;
+ }
+#endif
+ if (copy_from_user((char *)p, buf, write))
+ return -EFAULT;
+ p += write;
+ buf += write;
+ count -= write;
+ }
+
+ if (count > 0) {
+ kbuf = (char *)__get_free_page(GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+ while (count > 0) {
+ int len = count;
+
+ if (len > PAGE_SIZE)
+ len = PAGE_SIZE;
+ if (len && copy_from_user(kbuf, buf, len)) {
+ free_page((unsigned long)kbuf);
+ return -EFAULT;
+ }
+ len = vwrite(kbuf, (char *)p, len);
+ count -= len;
+ buf += len;
+ virtr += len;
+ p += len;
+ }
+ free_page((unsigned long)kbuf);
+ }
+ *ppos = p;
+ return virtr + write;
 }
 
 #if !defined(__mc68000__)
diff -urN -X dontdiff linux/include/linux/vmalloc.h vwrite/include/linux/vmalloc.h
--- linux/include/linux/vmalloc.h Tue Jul 11 19:26:51 2000
+++ vwrite/include/linux/vmalloc.h Thu Aug 31 15:22:54 2000
@@ -22,6 +22,7 @@
 extern void vfree(void * addr);
 extern void * __vmalloc (unsigned long size, int gfp_mask, pgprot_t prot);
 extern long vread(char *buf, char *addr, unsigned long count);
+extern long vwrite(char *buf, char *addr, unsigned long count);
 extern void vmfree_area_pages(unsigned long address, unsigned long size);
 extern int vmalloc_area_pages(unsigned long address, unsigned long size,
                               int gfp_mask, pgprot_t prot);
diff -urN -X dontdiff linux/mm/memory.c vwrite/mm/memory.c
--- linux/mm/memory.c Thu Aug 10 06:51:12 2000
+++ vwrite/mm/memory.c Thu Aug 31 15:30:16 2000
@@ -67,7 +67,7 @@
         copy_user_highpage(to, from, address);
 }
 
-mem_map_t * mem_map = NULL;
+mem_map_t * mem_map;
 
 /*
  * Note: this doesn't free the actual pages themselves. That
diff -urN -X dontdiff linux/mm/vmalloc.c vwrite/mm/vmalloc.c
--- linux/mm/vmalloc.c Thu Aug 10 06:51:12 2000
+++ vwrite/mm/vmalloc.c Thu Aug 31 15:21:02 2000
@@ -4,6 +4,7 @@
  * Copyright (C) 1993 Linus Torvalds
  * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  * SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
+ * vwrite for write_kmem, Amit S. Kale <akale@veritas.com>, May 2000
  */
 
 #include <linux/malloc.h>
@@ -263,6 +264,43 @@
                         if (count == 0)
                                 goto finished;
                         *buf = *addr;
+ buf++;
+ addr++;
+ count--;
+ } while (--n > 0);
+ }
+finished:
+ read_unlock(&vmlist_lock);
+ return buf - buf_start;
+}
+
+long vwrite(char *buf, char *addr, unsigned long count)
+{
+ struct vm_struct *tmp;
+ char *vaddr, *buf_start = buf;
+ unsigned long n;
+
+ /* Don't allow overflow */
+ if ((unsigned long) addr + count < count)
+ count = -(unsigned long) addr;
+
+ read_lock(&vmlist_lock);
+ for (tmp = vmlist; tmp; tmp = tmp->next) {
+ vaddr = (char *) tmp->addr;
+ if (addr >= vaddr + tmp->size - PAGE_SIZE)
+ continue;
+ while (addr < vaddr) {
+ if (count == 0)
+ goto finished;
+ buf++;
+ addr++;
+ count--;
+ }
+ n = vaddr + tmp->size - PAGE_SIZE - addr;
+ do {
+ if (count == 0)
+ goto finished;
+ *addr = *buf;
                         buf++;
                         addr++;
                         count--;
diff -urN -X dontdiff linux/mm/vmscan.c vwrite/mm/vmscan.c
--- linux/mm/vmscan.c Thu Aug 10 06:51:12 2000
+++ vwrite/mm/vmscan.c Thu Aug 31 15:30:08 2000
@@ -34,11 +34,13 @@
  * using a process that no longer actually exists (it might
  * have died while we slept).
  */
-static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask)
+static int try_to_swap_out(struct vm_area_struct * vma, unsigned long address,
+ pte_t * page_table, int gfp_mask)
 {
         pte_t pte;
         swp_entry_t entry;
         struct page * page;
+ struct mm_struct * mm = vma->vm_mm;
         int (*swapout)(struct page *, struct file *);
 
         pte = *page_table;
@@ -79,7 +81,7 @@
                 set_pte(page_table, swp_entry_to_pte(entry));
 drop_pte:
                 UnlockPage(page);
- vma->vm_mm->rss--;
+ mm->rss--;
                 flush_tlb_page(vma, address);
                 page_cache_release(page);
                 goto out_failed;
@@ -144,9 +146,9 @@
                 struct file *file = vma->vm_file;
                 if (file) get_file(file);
                 pte_clear(page_table);
- vma->vm_mm->rss--;
+ mm->rss--;
                 flush_tlb_page(vma, address);
- vmlist_access_unlock(vma->vm_mm);
+ vmlist_access_unlock(mm);
                 error = swapout(page, file);
                 UnlockPage(page);
                 if (file) fput(file);
@@ -175,10 +177,10 @@
         add_to_swap_cache(page, entry);
 
         /* Put the swap entry into the pte after the page is in swapcache */
- vma->vm_mm->rss--;
+ mm->rss--;
         set_pte(page_table, swp_entry_to_pte(entry));
         flush_tlb_page(vma, address);
- vmlist_access_unlock(vma->vm_mm);
+ vmlist_access_unlock(mm);
 
         /* OK, do a physical asynchronous write to swap. */
         rw_swap_page(WRITE, page, 0);
@@ -209,10 +211,11 @@
  * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
  */
 
-static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
+static inline int swap_out_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
 {
         pte_t * pte;
         unsigned long pmd_end;
+ struct mm_struct * mm = vma->vm_mm;
 
         if (pmd_none(*dir))
                 return 0;
@@ -230,8 +233,8 @@
 
         do {
                 int result;
- vma->vm_mm->swap_address = address + PAGE_SIZE;
- result = try_to_swap_out(mm, vma, address, pte, gfp_mask);
+ mm->swap_address = address + PAGE_SIZE;
+ result = try_to_swap_out(vma, address, pte, gfp_mask);
                 if (result)
                         return result;
                 if (!mm->swap_cnt)
@@ -242,10 +245,11 @@
         return 0;
 }
 
-static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
+static inline int swap_out_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
 {
         pmd_t * pmd;
         unsigned long pgd_end;
+ struct mm_struct * mm = vma->vm_mm;
 
         if (pgd_none(*dir))
                 return 0;
@@ -262,7 +266,7 @@
                 end = pgd_end;
         
         do {
- int result = swap_out_pmd(mm, vma, pmd, address, end, gfp_mask);
+ int result = swap_out_pmd(vma, pmd, address, end, gfp_mask);
                 if (result)
                         return result;
                 if (!mm->swap_cnt)
@@ -273,22 +277,23 @@
         return 0;
 }
 
-static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int gfp_mask)
+static int swap_out_vma(struct vm_area_struct * vma, unsigned long address, int gfp_mask)
 {
         pgd_t *pgdir;
         unsigned long end;
+ struct mm_struct * mm = vma->vm_mm;
 
         /* Don't swap out areas which are locked down */
         if (vma->vm_flags & VM_LOCKED)
                 return 0;
 
- pgdir = pgd_offset(vma->vm_mm, address);
+ pgdir = pgd_offset(mm, address);
 
         end = vma->vm_end;
         if (address >= end)
                 BUG();
         do {
- int result = swap_out_pgd(mm, vma, pgdir, address, end, gfp_mask);
+ int result = swap_out_pgd(vma, pgdir, address, end, gfp_mask);
                 if (result)
                         return result;
                 if (!mm->swap_cnt)
@@ -320,7 +325,7 @@
                         address = vma->vm_start;
 
                 for (;;) {
- int result = swap_out_vma(mm, vma, address, gfp_mask);
+ int result = swap_out_vma(vma, address, gfp_mask);
                         if (result)
                                 return result;
                         vma = vma->vm_next;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Thu Aug 31 2000 - 21:00:27 EST