[RFC]: madvise(2) and mincore(2) in 2.3.26

Chuck Lever (cel@monkey.org)
Tue, 16 Nov 1999 17:39:19 -0500 (EST)


hi all-

here's a snapshot of madvise, against kernel 2.3.26. i'd like some
comments and review, then i'll port it to a stable release of 2.3
(probably 2.3.29) for inclusion by Linus.

the meat of the implementation is in mm/filemap.c . the other stuff is
just details, although i'd appreciate review by arch-dependent
maintainers, since i'm not quite certain of the details for alpha, MIPS,
and sparc. i'm generally interested in comments on system call parameter
bounds checking, security implications, if any, and performance
enhancements. and naturally, any bug reports.

i've left out the mmap read-ahead heuristic for NORMAL files, for now.
that still needs some tuning, and i wanted to make the rest of this
available ASAP for testing.

since glibc has a stub that prevents applications from invoking madvise,
i've added a synonym on i386 called madvise1. to invoke madvise1 from
your program, include this in your source:

#include <asm/mman.h>
#include <asm/unistd.h>

_syscall3(int, madvise1, caddr_t, addr, size_t, len, int, advice);

when glibc and the user-land C headers are changed to recognize that
madvise exists and is functional on certain releases of Linux, this can be
replaced with the same invocation as other unices.

Index: arch/alpha/kernel/osf_sys.c
===================================================================
RCS file: /usr/cvsroot/linux-23/arch/alpha/kernel/osf_sys.c,v
retrieving revision 1.1.1.4
diff -u -r1.1.1.4 osf_sys.c
--- arch/alpha/kernel/osf_sys.c 1999/11/04 19:34:10 1.1.1.4
+++ arch/alpha/kernel/osf_sys.c 1999/11/10 20:30:46
@@ -204,15 +204,6 @@
return prio;
}

-
-/*
- * Heh. As documented by DEC..
- */
-asmlinkage unsigned long sys_madvise(void)
-{
- return 0;
-}
-
/*
* No need to acquire the kernel lock, we're local..
*/
Index: arch/i386/kernel/entry.S
===================================================================
RCS file: /usr/cvsroot/linux-23/arch/i386/kernel/entry.S,v
retrieving revision 1.1.1.7
diff -u -r1.1.1.7 entry.S
--- arch/i386/kernel/entry.S 1999/11/04 21:03:49 1.1.1.7
+++ arch/i386/kernel/entry.S 1999/11/10 20:32:10
@@ -592,6 +592,8 @@
.long SYMBOL_NAME(sys_ni_syscall) /* streams2 */
.long SYMBOL_NAME(sys_vfork) /* 190 */
.long SYMBOL_NAME(sys_getrlimit)
+ .long SYMBOL_NAME(sys_madvise)
+ .long SYMBOL_NAME(sys_mincore)

/*
* NOTE!! This doesn't have to be exact - we just have
@@ -599,6 +601,6 @@
* entries. Don't panic if you notice that this hasn't
* been shrunk every time we add a new system call.
*/
- .rept NR_syscalls-191
+ .rept NR_syscalls-193
.long SYMBOL_NAME(sys_ni_syscall)
.endr
Index: arch/m68k/kernel/entry.S
===================================================================
RCS file: /usr/cvsroot/linux-23/arch/m68k/kernel/entry.S,v
retrieving revision 1.1.1.4
diff -u -r1.1.1.4 entry.S
--- arch/m68k/kernel/entry.S 1999/11/04 20:01:55 1.1.1.4
+++ arch/m68k/kernel/entry.S 1999/11/10 20:33:44
@@ -600,6 +600,8 @@
.long SYMBOL_NAME(sys_ni_syscall) /* streams1 */
.long SYMBOL_NAME(sys_ni_syscall) /* streams2 */
.long SYMBOL_NAME(sys_vfork) /* 190 */
+ .long SYMBOL_NAME(sys_madvise)
+ .long SYMBOL_NAME(sys_mincore)

.rept NR_syscalls-(.-SYMBOL_NAME(sys_call_table))/4
.long SYMBOL_NAME(sys_ni_syscall)
Index: arch/mips/kernel/irix5sys.h
===================================================================
RCS file: /usr/cvsroot/linux-23/arch/mips/kernel/irix5sys.h,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 irix5sys.h
--- arch/mips/kernel/irix5sys.h 1999/11/04 19:09:50 1.1.1.2
+++ arch/mips/kernel/irix5sys.h 1999/11/10 20:45:50
@@ -157,7 +157,7 @@
SYS(sys_munmap, 2) /* 1135 munmap() V*/
SYS(sys_mprotect, 3) /* 1136 mprotect() V*/
SYS(sys_msync, 4) /* 1137 msync() V*/
-SYS(irix_madvise, 3) /* 1138 madvise() DC*/
+SYS(sys_madvise, 3) /* 1138 madvise() DC*/
SYS(irix_pagelock, 3) /* 1139 pagelock() IV*/
SYS(irix_getpagesize, 0) /* 1140 getpagesize() V*/
SYS(irix_quotactl, 0) /* 1141 quotactl() V*/
Index: arch/mips/kernel/syscalls.h
===================================================================
RCS file: /usr/cvsroot/linux-23/arch/mips/kernel/syscalls.h,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 syscalls.h
--- arch/mips/kernel/syscalls.h 1999/11/04 19:09:51 1.1.1.2
+++ arch/mips/kernel/syscalls.h 1999/11/10 21:16:16
@@ -223,5 +223,7 @@
SYS(sys_capset, 2) /* 4205 */
SYS(sys_sigaltstack, 2)
SYS(sys_sendfile, 3)
-SYS(sys_ni_syscall, 0)
-SYS(sys_ni_syscall, 0)
+SYS(sys_ni_syscall, 0) /* streams1 */
+SYS(sys_ni_syscall, 0) /* streams2 */
+SYS(sys_madvise, 3) /* 4210 */
+SYS(sys_mincore, 3)
Index: arch/mips/kernel/sysirix.c
===================================================================
RCS file: /usr/cvsroot/linux-23/arch/mips/kernel/sysirix.c,v
retrieving revision 1.1.1.3
diff -u -r1.1.1.3 sysirix.c
--- arch/mips/kernel/sysirix.c 1999/11/04 19:09:51 1.1.1.3
+++ arch/mips/kernel/sysirix.c 1999/11/10 20:41:14
@@ -1136,15 +1136,6 @@
return retval;
}

-asmlinkage int irix_madvise(unsigned long addr, int len, int behavior)
-{
- lock_kernel();
- printk("[%s:%ld] Wheee.. irix_madvise(%08lx,%d,%d)\n",
- current->comm, current->pid, addr, len, behavior);
- unlock_kernel();
- return -EINVAL;
-}
-
asmlinkage int irix_pagelock(char *addr, int len, int op)
{
lock_kernel();
Index: arch/ppc/kernel/misc.S
===================================================================
RCS file: /usr/cvsroot/linux-23/arch/ppc/kernel/misc.S,v
retrieving revision 1.1.1.9
diff -u -r1.1.1.9 misc.S
--- arch/ppc/kernel/misc.S 1999/11/04 20:47:41 1.1.1.9
+++ arch/ppc/kernel/misc.S 1999/11/10 20:49:05
@@ -1039,4 +1039,6 @@
.long sys_ni_syscall /* streams1 */
.long sys_ni_syscall /* streams2 */
.long sys_vfork
- .space (NR_syscalls-183)*4
+ .long sys_madvise /* 190 */
+ .long sys_mincore
+ .space (NR_syscalls-191)*4
Index: arch/sparc/mm/sun4c.c
===================================================================
RCS file: /usr/cvsroot/linux-23/arch/sparc/mm/sun4c.c,v
retrieving revision 1.1.1.5
diff -u -r1.1.1.5 sun4c.c
--- arch/sparc/mm/sun4c.c 1999/11/04 19:54:30 1.1.1.5
+++ arch/sparc/mm/sun4c.c 1999/11/16 19:12:36
@@ -1565,6 +1565,8 @@
sun4c_kstack_vma.vm_end = sun4c_taskstack_end;
sun4c_kstack_vma.vm_page_prot = PAGE_SHARED;
sun4c_kstack_vma.vm_flags = VM_READ | VM_WRITE | VM_EXEC;
+ sun4c_kstack_vma.vm_rd_behavior = MADV_DEFAULT;
+ sun4c_kstack_vma.vm_raend = 0;
insert_vm_struct(&init_mm, &sun4c_kstack_vma);
return start_mem;
}
Index: fs/exec.c
===================================================================
RCS file: /usr/cvsroot/linux-23/fs/exec.c,v
retrieving revision 1.1.1.14
diff -u -r1.1.1.14 exec.c
--- fs/exec.c 1999/11/04 21:03:53 1.1.1.14
+++ fs/exec.c 1999/11/16 19:07:15
@@ -37,6 +37,7 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
+#include <asm/mman.h>

#ifdef CONFIG_KMOD
#include <linux/kmod.h>
@@ -299,6 +300,8 @@
mpnt->vm_ops = NULL;
mpnt->vm_pgoff = 0;
mpnt->vm_file = NULL;
+ mpnt->vm_rd_behavior = MADV_NORMAL;
+ mpnt->vm_raend = 0;
mpnt->vm_private_data = (void *) 0;
vmlist_modify_lock(current->mm);
insert_vm_struct(current->mm, mpnt);
Index: include/asm-alpha/mman.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-alpha/mman.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 mman.h
--- include/asm-alpha/mman.h 1998/01/26 00:31:47 1.1.1.1
+++ include/asm-alpha/mman.h 1999/11/10 20:52:18
@@ -31,6 +31,12 @@
#define MCL_CURRENT 8192 /* lock all currently mapped pages */
#define MCL_FUTURE 16384 /* lock all additions to address space */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
Index: include/asm-alpha/unistd.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-alpha/unistd.h,v
retrieving revision 1.1.1.4
diff -u -r1.1.1.4 unistd.h
--- include/asm-alpha/unistd.h 1999/11/04 19:54:53 1.1.1.4
+++ include/asm-alpha/unistd.h 1999/11/10 21:09:47
@@ -79,7 +79,7 @@
#define __NR_madvise 75
#define __NR_vhangup 76
#define __NR_osf_kmodcall 77 /* not implemented */
-#define __NR_osf_mincore 78 /* not implemented */
+#define __NR_mincore 78
#define __NR_getgroups 79
#define __NR_setgroups 80
#define __NR_osf_old_getpgrp 81 /* not implemented */
Index: include/asm-arm/mman.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-arm/mman.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 mman.h
--- include/asm-arm/mman.h 1998/01/21 00:39:42 1.1.1.1
+++ include/asm-arm/mman.h 1999/11/10 20:53:10
@@ -25,6 +25,12 @@
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
Index: include/asm-arm/unistd.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-arm/unistd.h,v
retrieving revision 1.1.1.5
diff -u -r1.1.1.5 unistd.h
--- include/asm-arm/unistd.h 1999/11/04 20:47:59 1.1.1.5
+++ include/asm-arm/unistd.h 1999/11/10 21:10:28
@@ -198,6 +198,8 @@
/* 188 reserved */
/* 189 reserved */
#define __NR_vfork (__NR_SYSCALL_BASE+190)
+#define __NR_madvise (__NR_SYSCALL_BASE+191)
+#define __NR_mincore (__NR_SYSCALL_BASE+192)

#define __sys2(x) #x
#define __sys1(x) __sys2(x)
Index: include/asm-i386/mman.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-i386/mman.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 mman.h
--- include/asm-i386/mman.h 1996/10/07 05:55:48 1.1.1.1
+++ include/asm-i386/mman.h 1999/11/10 20:53:30
@@ -25,6 +25,12 @@
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
Index: include/asm-i386/unistd.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-i386/unistd.h,v
retrieving revision 1.1.1.4
diff -u -r1.1.1.4 unistd.h
--- include/asm-i386/unistd.h 1999/11/04 21:03:56 1.1.1.4
+++ include/asm-i386/unistd.h 1999/11/16 22:06:23
@@ -196,6 +196,9 @@
#define __NR_putpmsg 189 /* some people actually want streams */
#define __NR_vfork 190
#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */
+#define __NR_madvise 192
+#define __NR_madvise1 192 /* remove this, once glibc has real madvise */
+#define __NR_mincore 193

/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */

Index: include/asm-m68k/mman.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-m68k/mman.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 mman.h
--- include/asm-m68k/mman.h 1996/11/22 13:56:36 1.1.1.1
+++ include/asm-m68k/mman.h 1999/11/10 20:53:38
@@ -25,6 +25,12 @@
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
Index: include/asm-m68k/unistd.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-m68k/unistd.h,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 unistd.h
--- include/asm-m68k/unistd.h 1999/11/04 19:34:31 1.1.1.2
+++ include/asm-m68k/unistd.h 1999/11/10 21:12:23
@@ -194,6 +194,8 @@
#define __NR_getpmsg 188 /* some people actually want streams */
#define __NR_putpmsg 189 /* some people actually want streams */
#define __NR_vfork 190
+#define __NR_madvise 191
+#define __NR_mincore 192

/* user-visible error numbers are in the range -1 - -122: see
<asm-m68k/errno.h> */
Index: include/asm-mips/mman.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-mips/mman.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 mman.h
--- include/asm-mips/mman.h 1997/06/26 19:33:40 1.1.1.1
+++ include/asm-mips/mman.h 1999/11/10 20:54:07
@@ -56,6 +56,15 @@
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */

+/*
+ * Flags for madvise
+ */
+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
Index: include/asm-mips/unistd.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-mips/unistd.h,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 unistd.h
--- include/asm-mips/unistd.h 1999/11/04 19:10:00 1.1.1.2
+++ include/asm-mips/unistd.h 1999/11/10 21:17:07
@@ -1196,11 +1196,13 @@
#define __NR_sendfile (__NR_Linux + 207)
#define __NR_getpmsg (__NR_Linux + 208)
#define __NR_putpmsg (__NR_Linux + 209)
+#define __NR_madvise (__NR_Linux + 210)
+#define __NR_mincore (__NR_Linux + 211)

/*
* Offset of the last Linux flavoured syscall
*/
-#define __NR_Linux_syscalls 209
+#define __NR_Linux_syscalls 211

#ifndef _LANGUAGE_ASSEMBLY

Index: include/asm-ppc/mman.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-ppc/mman.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 mman.h
--- include/asm-ppc/mman.h 1996/12/18 08:54:09 1.1.1.1
+++ include/asm-ppc/mman.h 1999/11/10 20:54:20
@@ -25,6 +25,12 @@
#define MCL_CURRENT 0x2000 /* lock all currently mapped pages */
#define MCL_FUTURE 0x4000 /* lock all additions to address space */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
Index: include/asm-ppc/unistd.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-ppc/unistd.h,v
retrieving revision 1.1.1.3
diff -u -r1.1.1.3 unistd.h
--- include/asm-ppc/unistd.h 1999/11/04 19:54:55 1.1.1.3
+++ include/asm-ppc/unistd.h 1999/11/10 21:17:50
@@ -194,6 +194,8 @@
#define __NR_getpmsg 187 /* some people actually want streams */
#define __NR_putpmsg 188 /* some people actually want streams */
#define __NR_vfork 189
+#define __NR_madvise 190
+#define __NR_mincore 191

#define __NR(n) #n

Index: include/asm-sh/mman.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-sh/mman.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 mman.h
--- include/asm-sh/mman.h 1999/11/04 19:54:55 1.1.1.1
+++ include/asm-sh/mman.h 1999/11/10 20:54:29
@@ -25,6 +25,12 @@
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
Index: include/asm-sh/unistd.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-sh/unistd.h,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 unistd.h
--- include/asm-sh/unistd.h 1999/11/04 20:48:00 1.1.1.2
+++ include/asm-sh/unistd.h 1999/11/10 21:18:15
@@ -200,6 +200,8 @@
#define __NR_streams1 188 /* some people actually want it */
#define __NR_streams2 189 /* some people actually want it */
#define __NR_vfork 190
+#define __NR_madvise 191
+#define __NR_mincore 192

/* user-visible error numbers are in the range -1 - -125: see <asm-sh/errno.h> */

Index: include/asm-sparc/mman.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-sparc/mman.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 mman.h
--- include/asm-sparc/mman.h 1996/11/09 08:29:41 1.1.1.1
+++ include/asm-sparc/mman.h 1999/11/10 20:54:57
@@ -31,6 +31,12 @@
#define MCL_CURRENT 0x2000 /* lock all currently mapped pages */
#define MCL_FUTURE 0x4000 /* lock all additions to address space */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* XXX Need to add flags to SunOS's mctl, mlockall, and madvise system
* XXX calls.
*/
Index: include/asm-sparc/unistd.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-sparc/unistd.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 unistd.h
--- include/asm-sparc/unistd.h 1999/04/23 02:24:52 1.1.1.1
+++ include/asm-sparc/unistd.h 1999/11/10 21:18:52
@@ -90,10 +90,10 @@
/* #define __NR_vadvise 72 SunOS Specific */
#define __NR_munmap 73 /* Common */
#define __NR_mprotect 74 /* Common */
-/* #define __NR_madvise 75 SunOS Specific */
+#define __NR_madvise 75 /* SunOS Specific */
#define __NR_vhangup 76 /* Common */
/* #define __NR_ni_syscall 77 ENOSYS under SunOS */
-/* #define __NR_mincore 78 SunOS Specific */
+#define __NR_mincore 78 /* SunOS Specific */
#define __NR_getgroups 79 /* Common */
#define __NR_setgroups 80 /* Common */
#define __NR_getpgrp 81 /* Common */
Index: include/asm-sparc64/mman.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-sparc64/mman.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 mman.h
--- include/asm-sparc64/mman.h 1996/12/13 09:37:47 1.1.1.1
+++ include/asm-sparc64/mman.h 1999/11/10 20:55:26
@@ -28,6 +28,12 @@
#define MS_INVALIDATE 2 /* invalidate the caches */
#define MS_SYNC 4 /* synchronous memory sync */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
#define MCL_CURRENT 0x2000 /* lock all currently mapped pages */
#define MCL_FUTURE 0x4000 /* lock all additions to address space */

Index: include/asm-sparc64/unistd.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/asm-sparc64/unistd.h,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 unistd.h
--- include/asm-sparc64/unistd.h 1999/11/04 19:34:33 1.1.1.2
+++ include/asm-sparc64/unistd.h 1999/11/10 21:19:25
@@ -90,10 +90,10 @@
/* #define __NR_vadvise 72 SunOS Specific */
#define __NR_munmap 73 /* Common */
#define __NR_mprotect 74 /* Common */
-/* #define __NR_madvise 75 SunOS Specific */
+#define __NR_madvise 75 /* SunOS Specific */
#define __NR_vhangup 76 /* Common */
/* #define __NR_ni_syscall 77 ENOSYS under SunOS */
-/* #define __NR_mincore 78 SunOS Specific */
+#define __NR_mincore 78 /* SunOS Specific */
#define __NR_getgroups 79 /* Common */
#define __NR_setgroups 80 /* Common */
#define __NR_getpgrp 81 /* Common */
Index: include/linux/mm.h
===================================================================
RCS file: /usr/cvsroot/linux-23/include/linux/mm.h,v
retrieving revision 1.1.1.15
diff -u -r1.1.1.15 mm.h
--- include/linux/mm.h 1999/11/08 16:52:37 1.1.1.15
+++ include/linux/mm.h 1999/11/16 18:13:07
@@ -58,6 +58,8 @@
struct vm_operations_struct * vm_ops;
unsigned long vm_pgoff; /* offset in PAGE_SIZE units, *not* PAGE_CACHE_SIZE */
struct file * vm_file;
+ unsigned vm_rd_behavior; /* madvise hint */
+ unsigned long vm_raend; /* read-ahead context */
void * vm_private_data; /* was vm_pte (shared mem) */
};

Index: ipc/shm.c
===================================================================
RCS file: /usr/cvsroot/linux-23/ipc/shm.c,v
retrieving revision 1.1.1.16
diff -u -r1.1.1.16 shm.c
--- ipc/shm.c 1999/11/08 16:52:37 1.1.1.16
+++ ipc/shm.c 1999/11/16 19:07:29
@@ -26,6 +26,7 @@

#include <asm/uaccess.h>
#include <asm/pgtable.h>
+#include <asm/mman.h>

#include "util.h"

@@ -658,6 +659,8 @@
| VM_MAYREAD | VM_MAYEXEC | VM_READ | VM_EXEC
| ((shmflg & SHM_RDONLY) ? 0 : VM_MAYWRITE | VM_WRITE);
shmd->vm_file = NULL;
+ shmd->vm_rd_behavior = MADV_NORMAL;
+ shmd->vm_raend = 0;
shmd->vm_pgoff = 0;
shmd->vm_ops = &shm_vm_ops;

Index: kernel/fork.c
===================================================================
RCS file: /usr/cvsroot/linux-23/kernel/fork.c,v
retrieving revision 1.1.1.12
diff -u -r1.1.1.12 fork.c
--- kernel/fork.c 1999/11/04 20:55:57 1.1.1.12
+++ kernel/fork.c 1999/11/16 19:07:42
@@ -21,6 +21,7 @@
#include <asm/pgtable.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
+#include <asm/mman.h>

/* The idle threads do not count.. */
int nr_threads=0;
@@ -249,6 +250,8 @@
tmp->vm_mm = mm;
mm->map_count++;
tmp->vm_next = NULL;
+ tmp->vm_rd_behavior = MADV_NORMAL;
+ tmp->vm_raend = 0;
file = tmp->vm_file;
if (file) {
get_file(file);
Index: mm/filemap.c
===================================================================
RCS file: /usr/cvsroot/linux-23/mm/filemap.c,v
retrieving revision 1.1.1.20
diff -u -r1.1.1.20 filemap.c
--- mm/filemap.c 1999/11/08 16:52:37 1.1.1.20
+++ mm/filemap.c 1999/11/16 22:04:51
@@ -20,6 +20,7 @@
#include <linux/file.h>
#include <linux/swapctl.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/highmem.h>

@@ -36,6 +37,8 @@
* page-cache, 21.05.1999, Ingo Molnar <mingo@redhat.com>
*
* SMP-threaded pagemap-LRU 1999, Andrea Arcangeli <andrea@suse.de>
+ *
+ * madvise(2) and mincore(2), Chuck Lever <cel@monkey.org>
*/

atomic_t page_cache_size = ATOMIC_INIT(0);
@@ -52,6 +55,9 @@
#define CLUSTER_PAGES (1 << page_cluster)
#define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster)

+#define filesize_in_pages(f) \
+ (((f)->f_dentry->d_inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
+
void __add_page_to_hash_queue(struct page * page, struct page **p)
{
atomic_inc(&page_cache_size);
@@ -555,7 +561,7 @@
static int read_cluster_nonblocking(struct file * file, unsigned long offset)
{
int error = 0;
- unsigned long filesize = (file->f_dentry->d_inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ unsigned long filesize = filesize_in_pages(file);
unsigned long pages = CLUSTER_PAGES;

offset = CLUSTER_OFFSET(offset);
@@ -705,6 +711,453 @@
return page;
}

+/*
+ * The madvise(2) system call.
+ *
+ * Applications can use madvise() to advise the kernel how it should
+ * handle paging I/O in this VM area. The idea is to help the kernel
+ * use appropriate read-ahead and caching techniques. The information
+ * provided is advisory only, and can be safely disregarded by the
+ * kernel without affecting the correct operation of the application.
+ *
+ * behavior values:
+ * MADV_NORMAL - the default behavior is to read clusters. This
+ * results in some read-ahead and read-behind.
+ * MADV_RANDOM - the system should read the minimum amount of data
+ * on any access, since it is unlikely that the appli-
+ * cation will need more than what it asks for.
+ * MADV_SEQUENTIAL - pages in the given range will probably be
+ * accessed once, so they can be aggressively read ahead,
+ * and can be freed soon after they are accessed.
+ * MADV_WILLNEED - the application is notifying the system to read
+ * some pages ahead.
+ * MADV_DONTNEED - the application is finished with the given range,
+ * so the kernel can free resources associated with it.
+ *
+ * return values:
+ * zero = success
+ * -1 = some error occurred, errno value set (see below).
+ *
+ * errno values:
+ * EINVAL - start + len < 0, or start is not page-aligned, or
+ * behavior is not a valid value.
+ * ENOMEM - addresses in the specified range are not currently
+ * mapped, or are outside the AS of the process.
+ * EIO - an I/O error occurred while paging in data.
+ */
+
+static long madvise_fixup_start(struct vm_area_struct * vma,
+ unsigned long end, int behavior)
+{
+ struct vm_area_struct * n;
+
+ n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!n)
+ return -EAGAIN;
+ *n = *vma;
+ n->vm_end = end;
+ n->vm_flags = vma->vm_flags;
+ n->vm_rd_behavior = behavior;
+ n->vm_raend = 0;
+ if (n->vm_file)
+ get_file(n->vm_file);
+ if (n->vm_ops && n->vm_ops->open)
+ n->vm_ops->open(n);
+ vmlist_modify_lock(vma->vm_mm);
+ vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT;
+ vma->vm_start = end;
+ insert_vm_struct(current->mm, n);
+ vmlist_modify_unlock(vma->vm_mm);
+ return 0;
+}
+
+static long madvise_fixup_end(struct vm_area_struct * vma,
+ unsigned long start, int behavior)
+{
+ struct vm_area_struct * n;
+
+ n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!n)
+ return -EAGAIN;
+ *n = *vma;
+ n->vm_start = start;
+ n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT;
+ n->vm_flags = vma->vm_flags;
+ n->vm_rd_behavior = behavior;
+ n->vm_raend = 0;
+ if (n->vm_file)
+ get_file(n->vm_file);
+ if (n->vm_ops && n->vm_ops->open)
+ n->vm_ops->open(n);
+ vmlist_modify_lock(vma->vm_mm);
+ vma->vm_end = start;
+ insert_vm_struct(current->mm, n);
+ vmlist_modify_unlock(vma->vm_mm);
+ return 0;
+}
+
+static long madvise_fixup_middle(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end, int behavior)
+{
+ struct vm_area_struct * left, * right;
+
+ left = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!left)
+ return -EAGAIN;
+ right = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!right) {
+ kmem_cache_free(vm_area_cachep, left);
+ return -EAGAIN;
+ }
+ *left = *vma;
+ *right = *vma;
+ left->vm_end = start;
+ right->vm_start = end;
+ right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT;
+ left->vm_flags = vma->vm_flags;
+ right->vm_flags = vma->vm_flags;
+ left->vm_rd_behavior = vma->vm_rd_behavior;
+ left->vm_raend = 0;
+ right->vm_rd_behavior = vma->vm_rd_behavior;
+ right->vm_raend = 0;
+ if (vma->vm_file)
+ atomic_add(2, &vma->vm_file->f_count);
+
+ if (vma->vm_ops && vma->vm_ops->open) {
+ vma->vm_ops->open(left);
+ vma->vm_ops->open(right);
+ }
+ vmlist_modify_lock(vma->vm_mm);
+ vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT;
+ vma->vm_start = start;
+ vma->vm_end = end;
+ vma->vm_rd_behavior = behavior;
+ vma->vm_raend = 0;
+ insert_vm_struct(current->mm, left);
+ insert_vm_struct(current->mm, right);
+ vmlist_modify_unlock(vma->vm_mm);
+ return 0;
+}
+
+/*
+ * This function can potentially split a vm area into separate
+ * areas, each area with its own behavior.
+ */
+static long madvise_behavior(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end, int behavior)
+{
+ int error = 0;
+
+ if (behavior == vma->vm_rd_behavior)
+ return 0;
+
+ if (start == vma->vm_start) {
+ if (end == vma->vm_end) {
+ vma->vm_rd_behavior = behavior;
+ vma->vm_raend = 0;
+ } else
+ error = madvise_fixup_start(vma, end, behavior);
+ } else {
+ if (end == vma->vm_end)
+ error = madvise_fixup_end(vma, start, behavior);
+ else
+ error = madvise_fixup_middle(vma, start, end, behavior);
+ }
+
+ return error;
+}
+
+/*
+ * This schedules all required I/O operations, then runs the disk queue
+ * to make sure they are started. It does not wait for completion.
+ */
+static long madvise_willneed(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end)
+{
+ unsigned long filesize = filesize_in_pages(vma->vm_file);
+
+ /* do nothing if this is an anonymous page */
+ if (!vma->vm_file || !vma->vm_ops || !vma->vm_ops->nopage)
+ return 0;
+
+ start = ((start - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
+
+ if (end > vma->vm_end)
+ end = vma->vm_end;
+ end = ((end - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
+
+ while ((start < end) && (start < filesize)) {
+ int error = page_cache_read(vma->vm_file, start);
+ start++;
+ if (error < 0)
+ return error;
+ }
+
+ run_task_queue(&tq_disk);
+ return 0;
+}
+
+/*
+ * Application no longer needs these pages. If there is dirty data,
+ * it's OK to just throw it away. The app will be more careful about
+ * data it wants to keep. Be sure to free swap resources too.
+ *
+ * This is a simple-minded "lazy" implementation that just signals
+ * shrink_mmap to do the work later when the system needs more pages.
+ */
+static long madvise_dontneed(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end)
+{
+ struct inode * inode = vma->vm_file->f_dentry->d_inode;
+ struct page * page, **hash;
+
+ start = ((start - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
+ end = ((end - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
+
+ while (start < end) {
+ hash = page_hash(&inode->i_data, start);
+
+ spin_lock(&pagecache_lock);
+ page = __find_page_nolock(&inode->i_data, start,
+ *hash);
+ if (page)
+ clear_bit(PG_referenced, &page->flags);
+ spin_unlock(&pagecache_lock);
+
+ start++;
+ }
+
+ return 0;
+}
+
+static long madvise_area(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end, int behavior)
+{
+ int error = -EINVAL;
+
+ switch (behavior) {
+ case MADV_NORMAL:
+ case MADV_SEQUENTIAL:
+ case MADV_RANDOM:
+ error = madvise_behavior(vma, start, end, behavior);
+ break;
+
+ case MADV_WILLNEED:
+ error = madvise_willneed(vma, start, end);
+ break;
+
+ case MADV_DONTNEED:
+ error = madvise_dontneed(vma, start, end);
+ break;
+
+ default:
+ break;
+ }
+
+ return error;
+}
+
+asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior)
+{
+ unsigned long end;
+ struct vm_area_struct * vma;
+ int unmapped_error = 0;
+ int error = -EINVAL;
+
+ down(&current->mm->mmap_sem);
+
+ if (start & ~PAGE_MASK)
+ goto out;
+ len = (len + ~PAGE_MASK) & PAGE_MASK;
+ end = start + len;
+ if (end < start)
+ goto out;
+
+ error = 0;
+ if (end == start)
+ goto out;
+
+ /*
+ * If the interval [start,end) covers some unmapped address
+ * ranges, just ignore them, but return -ENOMEM at the end.
+ */
+ vma = find_vma(current->mm, start);
+ for (;;) {
+ /* Still start < end. */
+ error = -ENOMEM;
+ if (!vma)
+ goto out;
+
+ /* Here start < vma->vm_end. */
+ if (start < vma->vm_start) {
+ unmapped_error = -ENOMEM;
+ start = vma->vm_start;
+ }
+
+ /* Here vma->vm_start <= start < vma->vm_end. */
+ if (end <= vma->vm_end) {
+ if (start < end) {
+ error = madvise_area(vma, start, end,
+ behavior);
+ if (error)
+ goto out;
+ }
+ error = unmapped_error;
+ goto out;
+ }
+
+ /* Here vma->vm_start <= start < vma->vm_end < end. */
+ error = madvise_area(vma, start, vma->vm_end, behavior);
+ if (error)
+ goto out;
+ start = vma->vm_end;
+ vma = vma->vm_next;
+ }
+
+out:
+ up(&current->mm->mmap_sem);
+ return error;
+}
+
+/*
+ * The mincore(2) system call.
+ *
+ * mincore() returns the memory residency status of the pages in the
+ * current process's address space specified by [addr, addr + len).
+ * The status is returned in a vector of bytes. The least significant
+ * bit of each byte is 1 if the referenced page is in memory, otherwise
+ * it is zero.
+ *
+ * Because the status of a page can change after mincore() checks it
+ * but before it returns to the application, the returned vector may
+ * contain stale information. Only locked pages are guaranteed to
+ * remain in memory.
+ *
+ * return values:
+ * zero = success
+ * -1 = some error occurred, errno value set (see below).
+ *
+ * errno values:
+ * EFAULT - vec points to an illegal address
+ * EINVAL - addr is not a multiple of PAGE_CACHE_SIZE,
+ * or len has a nonpositive value
+ * ENOMEM - Addresses in the range [addr, addr + len] are
+ * invalid for the address space of this process, or
+ * specify one or more pages which are not currently
+ * mapped
+ */
+
+/*
+ * This predicate returns 1 if the page is "in core," otherwise 0.
+ * Later we can get more picky about what "in core" means precisely,
+ * but for now, it simply checks to see if the page is in the page
+ * cache.
+ */
+static inline char mincore_page_is_present(struct vm_area_struct * vma,
+ unsigned long pgoff)
+{
+ struct inode * inode = vma->vm_file->f_dentry->d_inode;
+ struct page * page, ** hash;
+
+ hash = page_hash(&inode->i_data, pgoff);
+ spin_lock(&pagecache_lock);
+ page = __find_page_nolock(&inode->i_data, pgoff, *hash);
+ spin_unlock(&pagecache_lock);
+
+ if (page)
+ return 1;
+ return 0;
+}
+
+static long mincore_area(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end, char * vec)
+{
+ int error, size, i = 0;
+ char * tmp;
+
+ start = ((start - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
+ if (end > vma->vm_end)
+ end = vma->vm_end;
+ end = ((end - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
+
+ /* # of bytes in "vec" = # of pages */
+ size = end - start;
+ tmp = (char *) vmalloc(size);
+ if (!tmp)
+ return -ENOMEM;
+
+ while (start < end)
+ tmp[i++] = mincore_page_is_present(vma, start++);
+
+ error = copy_to_user(vec, tmp, size) ? -EFAULT : 0;
+ vfree(tmp);
+ return error;
+}
+
+asmlinkage long sys_mincore(unsigned long start, size_t len, char *vec)
+{
+ int index = 0;
+ unsigned long end;
+ struct vm_area_struct * vma;
+ int unmapped_error = 0;
+ int error = -EINVAL;
+
+ down(&current->mm->mmap_sem);
+
+ if (start & ~PAGE_MASK)
+ goto out;
+ len = (len + ~PAGE_MASK) & PAGE_MASK;
+ end = start + len;
+ if (end < start)
+ goto out;
+
+ error = 0;
+ if (end == start)
+ goto out;
+
+ /*
+ * If the interval [start,end) covers some unmapped address
+ * ranges, just ignore them, but return -ENOMEM at the end.
+ */
+ vma = find_vma(current->mm, start);
+ for (;;) {
+ /* Still start < end. */
+ error = -ENOMEM;
+ if (!vma)
+ goto out;
+
+ /* Here start < vma->vm_end. */
+ if (start < vma->vm_start) {
+ unmapped_error = -ENOMEM;
+ start = vma->vm_start;
+ }
+
+ /* Here vma->vm_start <= start < vma->vm_end. */
+ if (end <= vma->vm_end) {
+ if (start < end) {
+ error = mincore_area(vma, start, end,
+ &vec[index]);
+ if (error)
+ goto out;
+ }
+ error = unmapped_error;
+ goto out;
+ }
+
+ /* Here vma->vm_start <= start < vma->vm_end < end. */
+ error = mincore_area(vma, start, vma->vm_end, &vec[index]);
+ if (error)
+ goto out;
+ index += (vma->vm_end - start) >> PAGE_CACHE_SHIFT;
+ start = vma->vm_end;
+ vma = vma->vm_next;
+ }
+
+out:
+ up(&current->mm->mmap_sem);
+ return error;
+}
+
#if 0
#define PROFILE_READAHEAD
#define DEBUG_READAHEAD
@@ -1292,6 +1745,63 @@
}

/*
+ * Read ahead for MADV_NORMAL areas.
+ * This is a no-op for now, but later we'll add a sophisticated
+ * read-ahead heuristic in here.
+ */
+static inline void nopage_normal_readahead(struct vm_area_struct * vma,
+ unsigned long pgoff)
+{
+ return;
+}
+
+/*
+ * Read ahead and flush behind for MADV_SEQUENTIAL areas.
+ * NB: Always use the largest read-ahead window.
+ */
+static void nopage_sequential_readahead(struct vm_area_struct * vma,
+ unsigned long pgoff)
+{
+ struct file * file = vma->vm_file;
+ unsigned long ra_window;
+
+ /*
+ * Window size is a multiple of the cluster size.
+ */
+ ra_window = get_max_readahead(file->f_dentry->d_inode);
+ ra_window = CLUSTER_OFFSET(ra_window + CLUSTER_PAGES - 1);
+
+ /*
+ * vm_raend is zero if we haven't read ahead in this area yet.
+ */
+ if (vma->vm_raend == 0)
+ vma->vm_raend = CLUSTER_OFFSET(pgoff) + ra_window +
+ CLUSTER_PAGES;
+
+ /*
+ * If we've just faulted the page half-way through our window,
+ * then schedule reads for the next window.
+ */
+ if ((pgoff + (ra_window >> 1)) == vma->vm_raend) {
+ unsigned count = ra_window;
+ unsigned long offset = vma->vm_raend;
+ unsigned long filesize = filesize_in_pages(file);
+
+ while (count && (offset < filesize)) {
+ if (page_cache_read(file, offset) < 0)
+ break;
+ offset++;
+ count--;
+ }
+ run_task_queue(&tq_disk);
+
+ vma->vm_raend += ra_window - count;
+ }
+
+ return;
+}
+
+/*
* filemap_nopage() is invoked via the vma operations vector for a
* mapped memory region to read in file data during a page fault.
*
@@ -1307,7 +1817,7 @@
struct dentry *dentry = file->f_dentry;
struct inode *inode = dentry->d_inode;
struct page *page, **hash, *old_page;
- unsigned long size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ unsigned long size = filesize_in_pages(file);

unsigned long pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;

@@ -1338,6 +1848,18 @@
goto page_not_uptodate;

success:
+ switch (area->vm_rd_behavior) {
+ case MADV_NORMAL:
+ nopage_normal_readahead(area, pgoff);
+ break;
+ case MADV_SEQUENTIAL:
+ nopage_sequential_readahead(area, pgoff);
+ break;
+ case MADV_RANDOM:
+ default:
+ break;
+ }
+
/*
* Found the page and have a reference on it, need to check sharing
* and possibly copy it over to another page..
@@ -1368,7 +1890,7 @@
* Otherwise, we're off the end of a privately mapped file,
* so we need to map a zero page.
*/
- if (pgoff < size)
+ if ((pgoff < size) && (area->vm_rd_behavior != MADV_RANDOM))
error = read_cluster_nonblocking(file, pgoff);
else
error = page_cache_read(file, pgoff);
Index: mm/mlock.c
===================================================================
RCS file: /usr/cvsroot/linux-23/mm/mlock.c,v
retrieving revision 1.1.1.8
diff -u -r1.1.1.8 mlock.c
--- mm/mlock.c 1999/11/04 21:04:06 1.1.1.8
+++ mm/mlock.c 1999/11/16 19:39:30
@@ -31,6 +31,8 @@
*n = *vma;
n->vm_end = end;
n->vm_flags = newflags;
+ n->vm_rd_behavior = vma->vm_rd_behavior;
+ n->vm_raend = 0;
if (n->vm_file)
get_file(n->vm_file);
if (n->vm_ops && n->vm_ops->open)
@@ -55,6 +57,8 @@
n->vm_start = start;
n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT;
n->vm_flags = newflags;
+ n->vm_rd_behavior = vma->vm_rd_behavior;
+ n->vm_raend = 0;
if (n->vm_file)
get_file(n->vm_file);
if (n->vm_ops && n->vm_ops->open)
@@ -85,6 +89,10 @@
right->vm_start = end;
right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT;
vma->vm_flags = newflags;
+ left->vm_rd_behavior = vma->vm_rd_behavior;
+ left->vm_raend = 0;
+ right->vm_rd_behavior = vma->vm_rd_behavior;
+ right->vm_raend = 0;
if (vma->vm_file)
atomic_add(2, &vma->vm_file->f_count);

@@ -96,6 +104,7 @@
vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT;
vma->vm_start = start;
vma->vm_end = end;
+ vma->vm_raend = 0;
vma->vm_flags = newflags;
insert_vm_struct(current->mm, left);
insert_vm_struct(current->mm, right);
Index: mm/mmap.c
===================================================================
RCS file: /usr/cvsroot/linux-23/mm/mmap.c,v
retrieving revision 1.1.1.13
diff -u -r1.1.1.13 mmap.c
--- mm/mmap.c 1999/11/04 21:04:06 1.1.1.13
+++ mm/mmap.c 1999/11/16 19:11:04
@@ -15,6 +15,7 @@

#include <asm/uaccess.h>
#include <asm/pgtable.h>
+#include <asm/mman.h>

/* description of effects of mapping type and prot in current implementation.
* this is due to the limited x86 page protection hardware. The expected
@@ -251,6 +252,8 @@
vma->vm_start = addr;
vma->vm_end = addr + len;
vma->vm_flags = vm_flags(prot,flags) | mm->def_flags;
+ vma->vm_rd_behavior = MADV_NORMAL;
+ vma->vm_raend = 0;

if (file) {
if (file->f_mode & 1)
@@ -549,6 +552,8 @@
mpnt->vm_end = area->vm_end;
mpnt->vm_page_prot = area->vm_page_prot;
mpnt->vm_flags = area->vm_flags;
+ mpnt->vm_rd_behavior = area->vm_rd_behavior;
+ mpnt->vm_raend = 0;
mpnt->vm_ops = area->vm_ops;
mpnt->vm_pgoff = area->vm_pgoff;
area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT;
@@ -788,6 +793,8 @@
vma->vm_ops = NULL;
vma->vm_pgoff = 0;
vma->vm_file = NULL;
+ vma->vm_rd_behavior = MADV_NORMAL; /* doesn't matter for anon map */
+ vma->vm_raend = 0;
vma->vm_private_data = NULL;

/*
@@ -951,6 +958,14 @@
if (off != mpnt->vm_pgoff)
continue;
}
+
+ /*
+ * If we have a file, then mmap page-in behavior
+ * must be the same..
+ */
+ if ((mpnt->vm_file == prev->vm_file) &&
+ (mpnt->vm_rd_behavior != prev->vm_rd_behavior))
+ continue;

/* merge prev with mpnt and set up pointers so the new
* big segment can possibly merge with the next one.
Index: mm/mprotect.c
===================================================================
RCS file: /usr/cvsroot/linux-23/mm/mprotect.c,v
retrieving revision 1.1.1.6
diff -u -r1.1.1.6 mprotect.c
--- mm/mprotect.c 1999/11/04 21:04:06 1.1.1.6
+++ mm/mprotect.c 1999/11/16 19:12:05
@@ -106,6 +106,8 @@
n->vm_end = end;
n->vm_flags = newflags;
n->vm_page_prot = prot;
+ n->vm_rd_behavior = vma->vm_rd_behavior;
+ n->vm_raend = 0;
if (n->vm_file)
get_file(n->vm_file);
if (n->vm_ops && n->vm_ops->open)
@@ -132,6 +134,8 @@
n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT;
n->vm_flags = newflags;
n->vm_page_prot = prot;
+ n->vm_rd_behavior = vma->vm_rd_behavior;
+ n->vm_raend = 0;
if (n->vm_file)
get_file(n->vm_file);
if (n->vm_ops && n->vm_ops->open)
@@ -162,6 +166,10 @@
left->vm_end = start;
right->vm_start = end;
right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT;
+ left->vm_rd_behavior = vma->vm_rd_behavior;
+ left->vm_raend = 0;
+ right->vm_rd_behavior = vma->vm_rd_behavior;
+ right->vm_raend = 0;
if (vma->vm_file)
atomic_add(2,&vma->vm_file->f_count);
if (vma->vm_ops && vma->vm_ops->open) {
@@ -173,6 +181,7 @@
vma->vm_start = start;
vma->vm_end = end;
vma->vm_flags = newflags;
+ vma->vm_raend = 0;
vma->vm_page_prot = prot;
insert_vm_struct(current->mm, left);
insert_vm_struct(current->mm, right);
Index: mm/mremap.c
===================================================================
RCS file: /usr/cvsroot/linux-23/mm/mremap.c,v
retrieving revision 1.1.1.7
diff -u -r1.1.1.7 mremap.c
--- mm/mremap.c 1999/11/04 21:04:06 1.1.1.7
+++ mm/mremap.c 1999/11/16 19:12:16
@@ -138,6 +138,8 @@
new_vma->vm_end = new_addr+new_len;
new_vma->vm_pgoff = vma->vm_pgoff;
new_vma->vm_pgoff += (addr - vma->vm_start) >> PAGE_SHIFT;
+ new_vma->vm_rd_behavior = vma->vm_rd_behavior;
+ new_vma->vm_raend = 0;
if (new_vma->vm_file)
get_file(new_vma->vm_file);
if (new_vma->vm_ops && new_vma->vm_ops->open)

- Chuck Lever

--
corporate:	<chuckl@netscape.com>
personal:	<chucklever@netscape.net> or <cel@monkey.org>

The Linux Scalability project: http://www.citi.umich.edu/projects/linux-scalability/

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/