[PATCH] madvise(2) and mincore(2) against 2.3.29

Chuck Lever (cel@monkey.org)
Wed, 1 Dec 1999 16:54:24 -0500 (EST)


hi linus-

here's madvise, against 2.3.29. it should apply cleanly to 2.3.30-pre3. i
think it's ready to be included in your kernel distribution, for much
wider testing. i've left in the "madvise1" definition, to work around the
existing glibc stub which just returns ENOTSUPP; it can be removed once
glibc supports madvise properly.

diff -ruN linux-2.3.29-ref/arch/alpha/kernel/osf_sys.c linux/arch/alpha/kernel/osf_sys.c
--- linux-2.3.29-ref/arch/alpha/kernel/osf_sys.c Thu Jul 29 16:37:22 1999
+++ linux/arch/alpha/kernel/osf_sys.c Sun Nov 28 21:48:49 1999
@@ -204,15 +204,6 @@
return prio;
}

-
-/*
- * Heh. As documented by DEC..
- */
-asmlinkage unsigned long sys_madvise(void)
-{
- return 0;
-}
-
/*
* No need to acquire the kernel lock, we're local..
*/
diff -ruN linux-2.3.29-ref/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S
--- linux-2.3.29-ref/arch/i386/kernel/entry.S Fri Nov 26 19:53:31 1999
+++ linux/arch/i386/kernel/entry.S Sun Nov 28 21:49:44 1999
@@ -592,6 +592,8 @@
.long SYMBOL_NAME(sys_ni_syscall) /* streams2 */
.long SYMBOL_NAME(sys_vfork) /* 190 */
.long SYMBOL_NAME(sys_getrlimit)
+ .long SYMBOL_NAME(sys_madvise)
+ .long SYMBOL_NAME(sys_mincore)

/*
* NOTE!! This doesn't have to be exact - we just have
@@ -599,6 +601,6 @@
* entries. Don't panic if you notice that this hasn't
* been shrunk every time we add a new system call.
*/
- .rept NR_syscalls-191
+ .rept NR_syscalls-193
.long SYMBOL_NAME(sys_ni_syscall)
.endr
diff -ruN linux-2.3.29-ref/arch/m68k/kernel/entry.S linux/arch/m68k/kernel/entry.S
--- linux-2.3.29-ref/arch/m68k/kernel/entry.S Fri Nov 26 19:52:09 1999
+++ linux/arch/m68k/kernel/entry.S Sun Nov 28 21:50:32 1999
@@ -600,6 +600,8 @@
.long SYMBOL_NAME(sys_ni_syscall) /* streams1 */
.long SYMBOL_NAME(sys_ni_syscall) /* streams2 */
.long SYMBOL_NAME(sys_vfork) /* 190 */
+ .long SYMBOL_NAME(sys_madvise)
+ .long SYMBOL_NAME(sys_mincore)

.rept NR_syscalls-(.-SYMBOL_NAME(sys_call_table))/4
.long SYMBOL_NAME(sys_ni_syscall)
diff -ruN linux-2.3.29-ref/arch/mips/kernel/irix5sys.h linux/arch/mips/kernel/irix5sys.h
--- linux-2.3.29-ref/arch/mips/kernel/irix5sys.h Fri Jun 25 20:40:12 1999
+++ linux/arch/mips/kernel/irix5sys.h Sun Nov 28 21:52:41 1999
@@ -157,7 +157,7 @@
SYS(sys_munmap, 2) /* 1135 munmap() V*/
SYS(sys_mprotect, 3) /* 1136 mprotect() V*/
SYS(sys_msync, 4) /* 1137 msync() V*/
-SYS(irix_madvise, 3) /* 1138 madvise() DC*/
+SYS(sys_madvise, 3) /* 1138 madvise() V*/
SYS(irix_pagelock, 3) /* 1139 pagelock() IV*/
SYS(irix_getpagesize, 0) /* 1140 getpagesize() V*/
SYS(irix_quotactl, 0) /* 1141 quotactl() V*/
diff -ruN linux-2.3.29-ref/arch/mips/kernel/syscalls.h linux/arch/mips/kernel/syscalls.h
--- linux-2.3.29-ref/arch/mips/kernel/syscalls.h Wed Jul 28 13:30:10 1999
+++ linux/arch/mips/kernel/syscalls.h Sun Nov 28 21:53:52 1999
@@ -223,5 +223,7 @@
SYS(sys_capset, 2) /* 4205 */
SYS(sys_sigaltstack, 2)
SYS(sys_sendfile, 3)
-SYS(sys_ni_syscall, 0)
-SYS(sys_ni_syscall, 0)
+SYS(sys_ni_syscall, 0) /* streams1 */
+SYS(sys_ni_syscall, 0) /* streams2 */
+SYS(sys_madvise, 3) /* 4210 */
+SYS(sys_mincore, 3)
diff -ruN linux-2.3.29-ref/arch/mips/kernel/sysirix.c linux/arch/mips/kernel/sysirix.c
--- linux-2.3.29-ref/arch/mips/kernel/sysirix.c Fri Jun 25 20:40:12 1999
+++ linux/arch/mips/kernel/sysirix.c Sun Nov 28 21:54:24 1999
@@ -1136,15 +1136,6 @@
return retval;
}

-asmlinkage int irix_madvise(unsigned long addr, int len, int behavior)
-{
- lock_kernel();
- printk("[%s:%ld] Wheee.. irix_madvise(%08lx,%d,%d)\n",
- current->comm, current->pid, addr, len, behavior);
- unlock_kernel();
- return -EINVAL;
-}
-
asmlinkage int irix_pagelock(char *addr, int len, int op)
{
lock_kernel();
diff -ruN linux-2.3.29-ref/arch/ppc/kernel/misc.S linux/arch/ppc/kernel/misc.S
--- linux-2.3.29-ref/arch/ppc/kernel/misc.S Fri Nov 26 19:53:15 1999
+++ linux/arch/ppc/kernel/misc.S Sun Nov 28 21:55:30 1999
@@ -1039,4 +1039,6 @@
.long sys_ni_syscall /* streams1 */
.long sys_ni_syscall /* streams2 */
.long sys_vfork
- .space (NR_syscalls-183)*4
+ .long sys_madvise /* 190 */
+ .long sys_mincore
+ .space (NR_syscalls-191)*4
diff -ruN linux-2.3.29-ref/arch/sparc/mm/sun4c.c linux/arch/sparc/mm/sun4c.c
--- linux-2.3.29-ref/arch/sparc/mm/sun4c.c Fri Nov 26 19:51:41 1999
+++ linux/arch/sparc/mm/sun4c.c Sun Nov 28 21:57:36 1999
@@ -26,6 +26,7 @@
#include <asm/openprom.h>
#include <asm/mmu_context.h>
#include <asm/sun4paddr.h>
+#include <asm/mman.h>

/* TODO: Make it such that interrupt handlers cannot dick with
* the user segment lists, most of the cli/sti pairs can
@@ -1565,6 +1566,8 @@
sun4c_kstack_vma.vm_end = sun4c_taskstack_end;
sun4c_kstack_vma.vm_page_prot = PAGE_SHARED;
sun4c_kstack_vma.vm_flags = VM_READ | VM_WRITE | VM_EXEC;
+ sun4c_kstack_vma.vm_rd_behavior = MADV_DEFAULT;
+ sun4c_kstack_vma.vm_raend = 0;
insert_vm_struct(&init_mm, &sun4c_kstack_vma);
return start_mem;
}
diff -ruN linux-2.3.29-ref/fs/exec.c linux/fs/exec.c
--- linux-2.3.29-ref/fs/exec.c Fri Nov 26 19:54:29 1999
+++ linux/fs/exec.c Sun Nov 28 21:58:47 1999
@@ -37,6 +37,7 @@
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/mmu_context.h>
+#include <asm/mman.h>

#ifdef CONFIG_KMOD
#include <linux/kmod.h>
@@ -295,6 +296,8 @@
mpnt->vm_ops = NULL;
mpnt->vm_pgoff = 0;
mpnt->vm_file = NULL;
+ mpnt->vm_rd_behavior = MADV_NORMAL;
+ mpnt->vm_raend = 0;
mpnt->vm_private_data = (void *) 0;
vmlist_modify_lock(current->mm);
insert_vm_struct(current->mm, mpnt);
diff -ruN linux-2.3.29-ref/include/asm-alpha/mman.h linux/include/asm-alpha/mman.h
--- linux-2.3.29-ref/include/asm-alpha/mman.h Sun Jan 25 19:31:47 1998
+++ linux/include/asm-alpha/mman.h Sun Nov 28 22:00:55 1999
@@ -31,6 +31,12 @@
#define MCL_CURRENT 8192 /* lock all currently mapped pages */
#define MCL_FUTURE 16384 /* lock all additions to address space */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
diff -ruN linux-2.3.29-ref/include/asm-alpha/unistd.h linux/include/asm-alpha/unistd.h
--- linux-2.3.29-ref/include/asm-alpha/unistd.h Fri Nov 26 19:51:57 1999
+++ linux/include/asm-alpha/unistd.h Sun Nov 28 22:02:00 1999
@@ -79,7 +79,7 @@
#define __NR_madvise 75
#define __NR_vhangup 76
#define __NR_osf_kmodcall 77 /* not implemented */
-#define __NR_osf_mincore 78 /* not implemented */
+#define __NR_mincore 78
#define __NR_getgroups 79
#define __NR_setgroups 80
#define __NR_osf_old_getpgrp 81 /* not implemented */
diff -ruN linux-2.3.29-ref/include/asm-arm/mman.h linux/include/asm-arm/mman.h
--- linux-2.3.29-ref/include/asm-arm/mman.h Tue Jan 20 19:39:42 1998
+++ linux/include/asm-arm/mman.h Sun Nov 28 22:03:22 1999
@@ -25,6 +25,12 @@
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
diff -ruN linux-2.3.29-ref/include/asm-arm/unistd.h linux/include/asm-arm/unistd.h
--- linux-2.3.29-ref/include/asm-arm/unistd.h Fri Nov 26 19:53:21 1999
+++ linux/include/asm-arm/unistd.h Sun Nov 28 22:04:17 1999
@@ -198,6 +198,8 @@
/* 188 reserved */
/* 189 reserved */
#define __NR_vfork (__NR_SYSCALL_BASE+190)
+#define __NR_madvise (__NR_SYSCALL_BASE+191)
+#define __NR_mincore (__NR_SYSCALL_BASE+192)

#define __sys2(x) #x
#define __sys1(x) __sys2(x)
diff -ruN linux-2.3.29-ref/include/asm-i386/mman.h linux/include/asm-i386/mman.h
--- linux-2.3.29-ref/include/asm-i386/mman.h Mon Oct 7 01:55:48 1996
+++ linux/include/asm-i386/mman.h Sun Nov 28 22:05:24 1999
@@ -25,6 +25,12 @@
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
diff -ruN linux-2.3.29-ref/include/asm-i386/unistd.h linux/include/asm-i386/unistd.h
--- linux-2.3.29-ref/include/asm-i386/unistd.h Fri Nov 26 19:53:37 1999
+++ linux/include/asm-i386/unistd.h Sun Nov 28 22:06:46 1999
@@ -196,6 +196,9 @@
#define __NR_putpmsg 189 /* some people actually want streams */
#define __NR_vfork 190
#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */
+#define __NR_madvise 192
+#define __NR_madvise1 192 /* remove this, once glibc has real madvise */
+#define __NR_mincore 193

/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */

diff -ruN linux-2.3.29-ref/include/asm-m68k/mman.h linux/include/asm-m68k/mman.h
--- linux-2.3.29-ref/include/asm-m68k/mman.h Fri Nov 22 08:56:36 1996
+++ linux/include/asm-m68k/mman.h Sun Nov 28 22:07:56 1999
@@ -25,6 +25,12 @@
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
diff -ruN linux-2.3.29-ref/include/asm-m68k/unistd.h linux/include/asm-m68k/unistd.h
--- linux-2.3.29-ref/include/asm-m68k/unistd.h Mon Aug 9 15:27:31 1999
+++ linux/include/asm-m68k/unistd.h Sun Nov 28 22:09:01 1999
@@ -194,6 +194,8 @@
#define __NR_getpmsg 188 /* some people actually want streams */
#define __NR_putpmsg 189 /* some people actually want streams */
#define __NR_vfork 190
+#define __NR_madvise 191
+#define __NR_mincore 192

/* user-visible error numbers are in the range -1 - -122: see
<asm-m68k/errno.h> */
diff -ruN linux-2.3.29-ref/include/asm-mips/mman.h linux/include/asm-mips/mman.h
--- linux-2.3.29-ref/include/asm-mips/mman.h Thu Jun 26 15:33:40 1997
+++ linux/include/asm-mips/mman.h Sun Nov 28 22:10:08 1999
@@ -56,6 +56,15 @@
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */

+/*
+ * Flags for madvise
+ */
+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
diff -ruN linux-2.3.29-ref/include/asm-mips/unistd.h linux/include/asm-mips/unistd.h
--- linux-2.3.29-ref/include/asm-mips/unistd.h Wed Jul 28 13:30:10 1999
+++ linux/include/asm-mips/unistd.h Sun Nov 28 22:11:20 1999
@@ -1196,11 +1196,13 @@
#define __NR_sendfile (__NR_Linux + 207)
#define __NR_getpmsg (__NR_Linux + 208)
#define __NR_putpmsg (__NR_Linux + 209)
+#define __NR_madvise (__NR_Linux + 210)
+#define __NR_mincore (__NR_Linux + 211)

/*
* Offset of the last Linux flavoured syscall
*/
-#define __NR_Linux_syscalls 209
+#define __NR_Linux_syscalls 211

#ifndef _LANGUAGE_ASSEMBLY

diff -ruN linux-2.3.29-ref/include/asm-ppc/mman.h linux/include/asm-ppc/mman.h
--- linux-2.3.29-ref/include/asm-ppc/mman.h Wed Dec 18 03:54:09 1996
+++ linux/include/asm-ppc/mman.h Sun Nov 28 22:12:09 1999
@@ -25,6 +25,12 @@
#define MCL_CURRENT 0x2000 /* lock all currently mapped pages */
#define MCL_FUTURE 0x4000 /* lock all additions to address space */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
diff -ruN linux-2.3.29-ref/include/asm-ppc/unistd.h linux/include/asm-ppc/unistd.h
--- linux-2.3.29-ref/include/asm-ppc/unistd.h Fri Nov 26 19:51:57 1999
+++ linux/include/asm-ppc/unistd.h Sun Nov 28 22:12:51 1999
@@ -194,6 +194,8 @@
#define __NR_getpmsg 187 /* some people actually want streams */
#define __NR_putpmsg 188 /* some people actually want streams */
#define __NR_vfork 189
+#define __NR_madvise 190
+#define __NR_mincore 191

#define __NR(n) #n

diff -ruN linux-2.3.29-ref/include/asm-sh/mman.h linux/include/asm-sh/mman.h
--- linux-2.3.29-ref/include/asm-sh/mman.h Fri Nov 26 19:51:57 1999
+++ linux/include/asm-sh/mman.h Sun Nov 28 22:13:36 1999
@@ -25,6 +25,12 @@
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* compatibility flags */
#define MAP_ANON MAP_ANONYMOUS
#define MAP_FILE 0
diff -ruN linux-2.3.29-ref/include/asm-sh/unistd.h linux/include/asm-sh/unistd.h
--- linux-2.3.29-ref/include/asm-sh/unistd.h Fri Nov 26 19:53:21 1999
+++ linux/include/asm-sh/unistd.h Sun Nov 28 22:14:07 1999
@@ -200,6 +200,8 @@
#define __NR_streams1 188 /* some people actually want it */
#define __NR_streams2 189 /* some people actually want it */
#define __NR_vfork 190
+#define __NR_madvise 191
+#define __NR_mincore 192

/* user-visible error numbers are in the range -1 - -125: see <asm-sh/errno.h> */

diff -ruN linux-2.3.29-ref/include/asm-sparc/mman.h linux/include/asm-sparc/mman.h
--- linux-2.3.29-ref/include/asm-sparc/mman.h Sat Nov 9 03:29:41 1996
+++ linux/include/asm-sparc/mman.h Sun Nov 28 22:15:00 1999
@@ -31,6 +31,12 @@
#define MCL_CURRENT 0x2000 /* lock all currently mapped pages */
#define MCL_FUTURE 0x4000 /* lock all additions to address space */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* XXX Need to add flags to SunOS's mctl, mlockall, and madvise system
* XXX calls.
*/
diff -ruN linux-2.3.29-ref/include/asm-sparc/unistd.h linux/include/asm-sparc/unistd.h
--- linux-2.3.29-ref/include/asm-sparc/unistd.h Wed Jul 28 13:30:10 1999
+++ linux/include/asm-sparc/unistd.h Sun Nov 28 22:16:19 1999
@@ -90,10 +90,10 @@
/* #define __NR_vadvise 72 SunOS Specific */
#define __NR_munmap 73 /* Common */
#define __NR_mprotect 74 /* Common */
-/* #define __NR_madvise 75 SunOS Specific */
+#define __NR_madvise 75 /* Common */
#define __NR_vhangup 76 /* Common */
/* #define __NR_ni_syscall 77 ENOSYS under SunOS */
-/* #define __NR_mincore 78 SunOS Specific */
+#define __NR_mincore 78 /* Common */
#define __NR_getgroups 79 /* Common */
#define __NR_setgroups 80 /* Common */
#define __NR_getpgrp 81 /* Common */
diff -ruN linux-2.3.29-ref/include/asm-sparc64/mman.h linux/include/asm-sparc64/mman.h
--- linux-2.3.29-ref/include/asm-sparc64/mman.h Fri Dec 13 04:37:47 1996
+++ linux/include/asm-sparc64/mman.h Sun Nov 28 22:17:12 1999
@@ -31,6 +31,12 @@
#define MCL_CURRENT 0x2000 /* lock all currently mapped pages */
#define MCL_FUTURE 0x4000 /* lock all additions to address space */

+#define MADV_NORMAL 0x0 /* default page-in behavior */
+#define MADV_RANDOM 0x1 /* page-in minimum required */
+#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */
+#define MADV_WILLNEED 0x3 /* pre-fault pages */
+#define MADV_DONTNEED 0x4 /* free these pages */
+
/* XXX Need to add flags to SunOS's mctl, mlockall, and madvise system
* XXX calls.
*/
diff -ruN linux-2.3.29-ref/include/asm-sparc64/unistd.h linux/include/asm-sparc64/unistd.h
--- linux-2.3.29-ref/include/asm-sparc64/unistd.h Wed Aug 4 18:39:46 1999
+++ linux/include/asm-sparc64/unistd.h Sun Nov 28 22:18:14 1999
@@ -90,10 +90,10 @@
/* #define __NR_vadvise 72 SunOS Specific */
#define __NR_munmap 73 /* Common */
#define __NR_mprotect 74 /* Common */
-/* #define __NR_madvise 75 SunOS Specific */
+#define __NR_madvise 75 /* Common */
#define __NR_vhangup 76 /* Common */
/* #define __NR_ni_syscall 77 ENOSYS under SunOS */
-/* #define __NR_mincore 78 SunOS Specific */
+#define __NR_mincore 78 /* Common */
#define __NR_getgroups 79 /* Common */
#define __NR_setgroups 80 /* Common */
#define __NR_getpgrp 81 /* Common */
diff -ruN linux-2.3.29-ref/include/linux/mm.h linux/include/linux/mm.h
--- linux-2.3.29-ref/include/linux/mm.h Fri Nov 26 20:02:53 1999
+++ linux/include/linux/mm.h Tue Nov 30 17:25:49 1999
@@ -59,6 +59,8 @@
struct vm_operations_struct * vm_ops;
unsigned long vm_pgoff; /* offset in PAGE_SIZE units, *not* PAGE_CACHE_SIZE */
struct file * vm_file;
+ unsigned vm_rd_behavior; /* madvise hint */
+ unsigned long vm_raend; /* read-ahead context */
void * vm_private_data; /* was vm_pte (shared mem) */
};

@@ -481,6 +483,11 @@
extern int shrink_mmap(int, int);
extern void truncate_inode_pages(struct inode *, unsigned long);
extern void put_cached_page(unsigned long);
+extern long madvise_dontneed(struct vm_area_struct *, unsigned long,
+ unsigned long);
+extern long madvise_willneed(struct vm_area_struct *, unsigned long,
+ unsigned long);
+

/*
* GFP bitmasks..
diff -ruN linux-2.3.29-ref/ipc/shm.c linux/ipc/shm.c
--- linux-2.3.29-ref/ipc/shm.c Fri Nov 26 19:54:12 1999
+++ linux/ipc/shm.c Sun Nov 28 22:20:37 1999
@@ -26,6 +26,7 @@

#include <asm/uaccess.h>
#include <asm/pgtable.h>
+#include <asm/mman.h>

#include "util.h"

@@ -663,6 +664,8 @@
| VM_MAYREAD | VM_MAYEXEC | VM_READ | VM_EXEC
| ((shmflg & SHM_RDONLY) ? 0 : VM_MAYWRITE | VM_WRITE);
shmd->vm_file = NULL;
+ shmd->vm_rd_behavior = MADV_NORMAL;
+ shmd->vm_raend = 0;
shmd->vm_pgoff = 0;
shmd->vm_ops = &shm_vm_ops;

diff -ruN linux-2.3.29-ref/kernel/fork.c linux/kernel/fork.c
--- linux-2.3.29-ref/kernel/fork.c Fri Nov 26 19:54:31 1999
+++ linux/kernel/fork.c Sun Nov 28 22:21:22 1999
@@ -22,6 +22,7 @@
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
+#include <asm/mman.h>

/* The idle threads do not count.. */
int nr_threads=0;
@@ -250,6 +251,8 @@
tmp->vm_mm = mm;
mm->map_count++;
tmp->vm_next = NULL;
+ tmp->vm_rd_behavior = MADV_NORMAL;
+ tmp->vm_raend = 0;
file = tmp->vm_file;
if (file) {
get_file(file);
diff -ruN linux-2.3.29-ref/mm/filemap.c linux/mm/filemap.c
--- linux-2.3.29-ref/mm/filemap.c Fri Nov 26 19:54:31 1999
+++ linux/mm/filemap.c Wed Dec 1 16:06:12 1999
@@ -20,11 +20,13 @@
#include <linux/file.h>
#include <linux/swapctl.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/mm.h>

#include <asm/pgalloc.h>
#include <asm/uaccess.h>
+#include <asm/mman.h>

#include <linux/highmem.h>

@@ -38,6 +40,8 @@
* page-cache, 21.05.1999, Ingo Molnar <mingo@redhat.com>
*
* SMP-threaded pagemap-LRU 1999, Andrea Arcangeli <andrea@suse.de>
+ *
+ * madvise(2) and mincore(2), Chuck Lever <cel@monkey.org>
*/

atomic_t page_cache_size = ATOMIC_INIT(0);
@@ -54,6 +58,9 @@
#define CLUSTER_PAGES (1 << page_cluster)
#define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster)

+#define filesize_in_pages(f) \
+ (((f)->f_dentry->d_inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
+
void __add_page_to_hash_queue(struct page * page, struct page **p)
{
atomic_inc(&page_cache_size);
@@ -552,24 +559,22 @@

/*
* Read in an entire cluster at once. A cluster is usually a 64k-
- * aligned block that includes the address requested in "offset."
+ * aligned block that includes the page requested in "offset."
*/
-static int read_cluster_nonblocking(struct file * file, unsigned long offset)
+static int read_cluster_nonblocking(struct file * file, unsigned long offset,
+ unsigned long filesize)
{
- int error = 0;
- unsigned long filesize = (file->f_dentry->d_inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
unsigned long pages = CLUSTER_PAGES;

offset = CLUSTER_OFFSET(offset);
while ((pages-- > 0) && (offset < filesize)) {
- error = page_cache_read(file, offset);
- if (error >= 0)
- offset ++;
- else
- break;
+ int error = page_cache_read(file, offset);
+ if (error < 0)
+ return error;
+ offset ++;
}

- return error;
+ return 0;
}

/*
@@ -707,6 +712,493 @@
return page;
}

+/*
+ * The madvise(2) system call.
+ *
+ * Applications can use madvise() to advise the kernel how it should
+ * handle paging I/O in this VM area. The idea is to help the kernel
+ * use appropriate read-ahead and caching techniques. The information
+ * provided is advisory only, and can be safely disregarded by the
+ * kernel without affecting the correct operation of the application.
+ *
+ * behavior values:
+ * MADV_NORMAL - the default behavior is to read clusters. This
+ * results in some read-ahead and read-behind.
+ * MADV_RANDOM - the system should read the minimum amount of data
+ * on any access, since it is unlikely that the appli-
+ * cation will need more than what it asks for.
+ * MADV_SEQUENTIAL - pages in the given range will probably be accessed
+ * once, so they can be aggressively read ahead, and
+ * can be freed soon after they are accessed.
+ * MADV_WILLNEED - the application is notifying the system to read
+ * some pages ahead.
+ * MADV_DONTNEED - the application is finished with the given range,
+ * so the kernel can free resources associated with it.
+ *
+ * return values:
+ * zero = success
+ * -1 = some error occurred, errno value set (see below).
+ *
+ * errno values:
+ * EINVAL - start + len < 0, start is not page-aligned,
+ * "behavior" is not a valid value, or application
+ * is attempting to release locked or shared pages.
+ * ENOMEM - addresses in the specified range are not currently
+ * mapped, or are outside the AS of the process,
+ * or the kernel has exhausted its memory resources.
+ * EIO - an I/O error occurred while paging in data.
+ * EBADF - map exists, but area maps something that isn't a file.
+ */
+
+static long madvise_fixup_start(struct vm_area_struct * vma,
+ unsigned long end, int behavior)
+{
+ struct vm_area_struct * n;
+
+ n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!n)
+ return -ENOMEM;
+ *n = *vma;
+ n->vm_end = end;
+ n->vm_flags = vma->vm_flags;
+ n->vm_rd_behavior = behavior;
+ n->vm_raend = 0;
+ if (n->vm_file)
+ get_file(n->vm_file);
+ if (n->vm_ops && n->vm_ops->open)
+ n->vm_ops->open(n);
+ vmlist_modify_lock(vma->vm_mm);
+ vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT;
+ vma->vm_start = end;
+ insert_vm_struct(current->mm, n);
+ vmlist_modify_unlock(vma->vm_mm);
+ return 0;
+}
+
+static long madvise_fixup_end(struct vm_area_struct * vma,
+ unsigned long start, int behavior)
+{
+ struct vm_area_struct * n;
+
+ n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!n)
+ return -ENOMEM;
+ *n = *vma;
+ n->vm_start = start;
+ n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT;
+ n->vm_flags = vma->vm_flags;
+ n->vm_rd_behavior = behavior;
+ n->vm_raend = 0;
+ if (n->vm_file)
+ get_file(n->vm_file);
+ if (n->vm_ops && n->vm_ops->open)
+ n->vm_ops->open(n);
+ vmlist_modify_lock(vma->vm_mm);
+ vma->vm_end = start;
+ insert_vm_struct(current->mm, n);
+ vmlist_modify_unlock(vma->vm_mm);
+ return 0;
+}
+
+static long madvise_fixup_middle(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end, int behavior)
+{
+ struct vm_area_struct * left, * right;
+
+ left = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!left)
+ return -ENOMEM;
+ right = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!right) {
+ kmem_cache_free(vm_area_cachep, left);
+ return -ENOMEM;
+ }
+ *left = *vma;
+ *right = *vma;
+ left->vm_end = start;
+ right->vm_start = end;
+ right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT;
+ left->vm_flags = vma->vm_flags;
+ right->vm_flags = vma->vm_flags;
+ left->vm_rd_behavior = vma->vm_rd_behavior;
+ left->vm_raend = 0;
+ right->vm_rd_behavior = vma->vm_rd_behavior;
+ right->vm_raend = 0;
+ if (vma->vm_file)
+ atomic_add(2, &vma->vm_file->f_count);
+
+ if (vma->vm_ops && vma->vm_ops->open) {
+ vma->vm_ops->open(left);
+ vma->vm_ops->open(right);
+ }
+ vmlist_modify_lock(vma->vm_mm);
+ vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT;
+ vma->vm_start = start;
+ vma->vm_end = end;
+ vma->vm_rd_behavior = behavior;
+ vma->vm_raend = 0;
+ insert_vm_struct(current->mm, left);
+ insert_vm_struct(current->mm, right);
+ vmlist_modify_unlock(vma->vm_mm);
+ return 0;
+}
+
+/*
+ * This function can potentially split a vm area into separate
+ * areas, each area with its own behavior.
+ */
+static long madvise_behavior(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end, int behavior)
+{
+ int error = 0;
+
+ if (behavior == vma->vm_rd_behavior)
+ return 0;
+
+ /* Setting page-in behavior doesn't make sense for anonymous maps */
+ if (!vma->vm_file)
+ return -EBADF;
+
+ /* This caps the number of vma's this process can own */
+ if (vma->vm_mm->map_count > MAX_MAP_COUNT)
+ return -ENOMEM;
+
+ if (start == vma->vm_start) {
+ if (end == vma->vm_end) {
+ vma->vm_rd_behavior = behavior;
+ vma->vm_raend = 0;
+ } else
+ error = madvise_fixup_start(vma, end, behavior);
+ } else {
+ if (end == vma->vm_end)
+ error = madvise_fixup_end(vma, start, behavior);
+ else
+ error = madvise_fixup_middle(vma, start, end, behavior);
+ }
+
+ return error;
+}
+
+/*
+ * This schedules all required I/O operations, then runs the disk queue
+ * to make sure they are started. It does not wait for completion.
+ */
+long madvise_willneed(struct vm_area_struct * vma, unsigned long start,
+ unsigned long end)
+{
+ int error = 0;
+ unsigned long filesize;
+
+ /* Forcing page-ins doesn't make sense for anonymous maps */
+ if ((!vma->vm_file) || (!vma->vm_ops) || (!vma->vm_ops->nopage))
+ return -EBADF;
+
+ /* Convert start and end to page-size offsets into the file */
+ start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+
+ if (end > vma->vm_end)
+ end = vma->vm_end;
+ end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+
+ /* round to cluster boundaries if this isn't a "random" area */
+ filesize = filesize_in_pages(vma->vm_file);
+ if (vma->vm_rd_behavior != MADV_RANDOM) {
+ start = CLUSTER_OFFSET(start);
+ end = CLUSTER_OFFSET(end + CLUSTER_PAGES - 1);
+
+ while ((start < end) && (start < filesize)) {
+ error = read_cluster_nonblocking(vma->vm_file, start,
+ filesize);
+ start += CLUSTER_PAGES;
+ if (error < 0)
+ break;
+ }
+ } else {
+ while ((start < end) && (start < filesize)) {
+ error = page_cache_read(vma->vm_file, start);
+ start++;
+ if (error < 0)
+ break;
+ }
+ }
+
+ run_task_queue(&tq_disk);
+ return error;
+}
+
+/*
+ * Application no longer needs these pages. If the pages are dirty,
+ * it's OK to just throw them away. The app will be more careful about
+ * data it wants to keep. Be sure to free swap resources too. The
+ * zap_page_range call sets things up for shrink_mmap to actually free
+ * these pages later if no one else has touched them in the meantime.
+ *
+ * NB: This interface discards data rather than pushes it out to swap,
+ * as some implementations do. This has performance implications for
+ * applications like large transactional databases which want to discard
+ * pages in anonymous maps after committing to backing store the data
+ * that was kept in them. There is no reason to write this data out to
+ * the swap area if the application is discarding it.
+ */
+long madvise_dontneed(struct vm_area_struct * vma, unsigned long start,
+ unsigned long end)
+{
+ if (vma->vm_flags & VM_LOCKED)
+ return -EINVAL;
+
+ lock_kernel();
+
+ flush_cache_range(vma->vm_mm, start, end);
+ zap_page_range(vma->vm_mm, start, end - start);
+ flush_tlb_range(vma->vm_mm, start, end);
+
+ unlock_kernel();
+ return 0;
+}
+
+static inline long madvise_area(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end, int behavior)
+{
+ int error;
+
+ switch (behavior) {
+ case MADV_NORMAL:
+ case MADV_SEQUENTIAL:
+ case MADV_RANDOM:
+ error = madvise_behavior(vma, start, end, behavior);
+ break;
+
+ case MADV_WILLNEED:
+ error = madvise_willneed(vma, start, end);
+ break;
+
+ case MADV_DONTNEED:
+ error = madvise_dontneed(vma, start, end);
+ break;
+
+ default:
+ error = -EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior)
+{
+ unsigned long end;
+ struct vm_area_struct * vma;
+ int unmapped_error = 0;
+ int error = -EINVAL;
+
+ down(&current->mm->mmap_sem);
+
+ if (start & ~PAGE_MASK)
+ goto out;
+ len = (len + ~PAGE_MASK) & PAGE_MASK;
+ end = start + len;
+ if (end < start)
+ goto out;
+
+ error = 0;
+ if (end == start)
+ goto out;
+
+ /*
+ * If the interval [start,end) covers some unmapped address
+ * ranges, just ignore them, but return -ENOMEM at the end.
+ */
+ vma = find_vma(current->mm, start);
+ for (;;) {
+ /* Still start < end. */
+ error = -ENOMEM;
+ if (!vma)
+ goto out;
+
+ /* Here start < vma->vm_end. */
+ if (start < vma->vm_start) {
+ unmapped_error = -ENOMEM;
+ start = vma->vm_start;
+ }
+
+ /* Here vma->vm_start <= start < vma->vm_end. */
+ if (end <= vma->vm_end) {
+ if (start < end) {
+ error = madvise_area(vma, start, end,
+ behavior);
+ if (error)
+ goto out;
+ }
+ error = unmapped_error;
+ goto out;
+ }
+
+ /* Here vma->vm_start <= start < vma->vm_end < end. */
+ error = madvise_area(vma, start, vma->vm_end, behavior);
+ if (error)
+ goto out;
+ start = vma->vm_end;
+ vma = vma->vm_next;
+ }
+
+out:
+ up(&current->mm->mmap_sem);
+ return error;
+}
+
+/*
+ * The mincore(2) system call.
+ *
+ * mincore() returns the memory residency status of the pages in the
+ * current process's address space specified by [addr, addr + len).
+ * The status is returned in a vector of bytes. The least significant
+ * bit of each byte is 1 if the referenced page is in memory, otherwise
+ * it is zero.
+ *
+ * Because the status of a page can change after mincore() checks it
+ * but before it returns to the application, the returned vector may
+ * contain stale information. Only locked pages are guaranteed to
+ * remain in memory.
+ *
+ * return values:
+ * zero = success
+ * -1 = some error occurred, errno value set (see below).
+ *
+ * errno values:
+ * EFAULT - vec points to an illegal address
+ * EINVAL - addr is not a multiple of PAGE_CACHE_SIZE,
+ * or len has a nonpositive value
+ * ENOMEM - Addresses in the range [addr, addr + len] are
+ * invalid for the address space of this process, or
+ * specify one or more pages which are not currently
+ * mapped
+ */
+
+/*
+ * This predicate returns 1 if the page is "in core," otherwise 0.
+ * Later we can get more picky about what "in core" means precisely,
+ * but for now, it simply checks to see if the page is in the page
+ * cache, and is up to date; i.e. that no page-in operation would be
+ * required at this time if an application were to map and access
+ * this page.
+ *
+ * We are careful with locking here only to prevent an oops. The
+ * application already treats this information as a hint -- it can
+ * become stale by the time the app actually gets it.
+ */
+static inline char mincore_page_is_present(struct vm_area_struct * vma,
+ unsigned long pgoff)
+{
+ int result = 0;
+ struct inode * inode;
+ struct page * page, ** hash;
+
+ /* Anonymous pages are always present */
+ if (!vma->vm_file)
+ return 1;
+
+ inode = vma->vm_file->f_dentry->d_inode;
+ hash = page_hash(&inode->i_data, pgoff);
+ page = __find_get_page(&inode->i_data, pgoff, hash);
+ if (page) {
+ if (Page_Uptodate(page))
+ result = 1;
+ UnlockPage(page);
+ page_cache_release(page);
+ }
+
+ return result;
+}
+
+static long mincore_area(struct vm_area_struct * vma,
+ unsigned long start, unsigned long end, char * vec)
+{
+ int error, size, i = 0;
+ char * tmp;
+
+ start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+ if (end > vma->vm_end)
+ end = vma->vm_end;
+ end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+
+ /* # of bytes in "vec" = # of pages */
+ size = end - start;
+ tmp = (char *) vmalloc(size);
+ if (!tmp)
+ return -ENOMEM;
+
+ while (start < end)
+ tmp[i++] = mincore_page_is_present(vma, start++);
+
+ error = copy_to_user(vec, tmp, size) ? -EFAULT : 0;
+ vfree(tmp);
+ return error;
+}
+
+asmlinkage long sys_mincore(unsigned long start, size_t len, char *vec)
+{
+ int index = 0;
+ unsigned long end;
+ struct vm_area_struct * vma;
+ int unmapped_error = 0;
+ int error = -EINVAL;
+
+ down(&current->mm->mmap_sem);
+
+ if (start & ~PAGE_MASK)
+ goto out;
+ len = (len + ~PAGE_MASK) & PAGE_MASK;
+ end = start + len;
+ if (end < start)
+ goto out;
+
+ error = 0;
+ if (end == start)
+ goto out;
+
+ /*
+ * If the interval [start,end) covers some unmapped address
+ * ranges, just ignore them, but return -ENOMEM at the end.
+ */
+ vma = find_vma(current->mm, start);
+ for (;;) {
+ /* Still start < end. */
+ error = -ENOMEM;
+ if (!vma)
+ goto out;
+
+ /* Here start < vma->vm_end. */
+ if (start < vma->vm_start) {
+ unmapped_error = -ENOMEM;
+ start = vma->vm_start;
+ }
+
+ /* Here vma->vm_start <= start < vma->vm_end. */
+ if (end <= vma->vm_end) {
+ if (start < end) {
+ error = mincore_area(vma, start, end,
+ &vec[index]);
+ if (error)
+ goto out;
+ }
+ error = unmapped_error;
+ goto out;
+ }
+
+ /* Here vma->vm_start <= start < vma->vm_end < end. */
+ error = mincore_area(vma, start, vma->vm_end, &vec[index]);
+ if (error)
+ goto out;
+ index += (vma->vm_end - start) >> PAGE_CACHE_SHIFT;
+ start = vma->vm_end;
+ vma = vma->vm_next;
+ }
+
+out:
+ up(&current->mm->mmap_sem);
+ return error;
+}
+
#if 0
#define PROFILE_READAHEAD
#define DEBUG_READAHEAD
@@ -1294,6 +1786,61 @@
}

/*
+ * Read-ahead and flush behind for MADV_SEQUENTIAL areas. Since we are
+ * sure this is sequential access, we don't need a flexible read-ahead
+ * window size -- we can always use a large fixed size window.
+ */
+static void nopage_sequential_readahead(struct vm_area_struct * vma,
+ unsigned long pgoff, unsigned long filesize)
+{
+ unsigned long ra_window;
+
+ ra_window = get_max_readahead(vma->vm_file->f_dentry->d_inode);
+ ra_window = CLUSTER_OFFSET(ra_window + CLUSTER_PAGES - 1);
+
+ /* vm_raend is zero if we haven't read ahead in this area yet. */
+ if (vma->vm_raend == 0)
+ vma->vm_raend = vma->vm_pgoff + ra_window;
+
+ /*
+ * If we've just faulted the page half-way through our window,
+ * then schedule reads for the next window, and release the
+ * pages in the previous window.
+ */
+ if ((pgoff + (ra_window >> 1)) == vma->vm_raend) {
+ unsigned long start = vma->vm_pgoff + vma->vm_raend;
+ unsigned long end = start + ra_window;
+
+ if (end > ((vma->vm_end >> PAGE_SHIFT) + vma->vm_pgoff))
+ end = (vma->vm_end >> PAGE_SHIFT) + vma->vm_pgoff;
+ if (start > end)
+ return;
+
+ while ((start < end) && (start < filesize)) {
+ if (read_cluster_nonblocking(vma->vm_file,
+ start, filesize) < 0)
+ break;
+ start += CLUSTER_PAGES;
+ }
+ run_task_queue(&tq_disk);
+
+ /* if we're far enough past the beginning of this area,
+ recycle pages that are in the previous window. */
+ if (vma->vm_raend > (vma->vm_pgoff + ra_window + ra_window)) {
+ unsigned long window = ra_window << PAGE_SHIFT;
+
+ end = vma->vm_start + (vma->vm_raend << PAGE_SHIFT);
+ end -= window + window;
+ madvise_dontneed(vma, end - window, end);
+ }
+
+ vma->vm_raend += ra_window;
+ }
+
+ return;
+}
+
+/*
* filemap_nopage() is invoked via the vma operations vector for a
* mapped memory region to read in file data during a page fault.
*
@@ -1309,7 +1856,7 @@
struct dentry *dentry = file->f_dentry;
struct inode *inode = dentry->d_inode;
struct page *page, **hash, *old_page;
- unsigned long size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ unsigned long size = filesize_in_pages(file);

unsigned long pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;

@@ -1341,6 +1888,12 @@

success:
/*
+ * Try read-ahead for sequential areas.
+ */
+ if (area->vm_rd_behavior == MADV_SEQUENTIAL)
+ nopage_sequential_readahead(area, pgoff, size);
+
+ /*
* Found the page and have a reference on it, need to check sharing
* and possibly copy it over to another page..
*/
@@ -1368,8 +1921,8 @@
* Otherwise, we're off the end of a privately mapped file,
* so we need to map a zero page.
*/
- if (pgoff < size)
- error = read_cluster_nonblocking(file, pgoff);
+ if ((pgoff < size ) && (area->vm_rd_behavior != MADV_RANDOM))
+ error = read_cluster_nonblocking(file, pgoff, size);
else
error = page_cache_read(file, pgoff);

diff -ruN linux-2.3.29-ref/mm/mlock.c linux/mm/mlock.c
--- linux-2.3.29-ref/mm/mlock.c Fri Nov 26 19:53:37 1999
+++ linux/mm/mlock.c Sun Nov 28 22:56:56 1999
@@ -31,6 +31,8 @@
*n = *vma;
n->vm_end = end;
n->vm_flags = newflags;
+ n->vm_rd_behavior = vma->vm_rd_behavior;
+ n->vm_raend = 0;
if (n->vm_file)
get_file(n->vm_file);
if (n->vm_ops && n->vm_ops->open)
@@ -55,6 +57,8 @@
n->vm_start = start;
n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT;
n->vm_flags = newflags;
+ n->vm_rd_behavior = vma->vm_rd_behavior;
+ n->vm_raend = 0;
if (n->vm_file)
get_file(n->vm_file);
if (n->vm_ops && n->vm_ops->open)
@@ -85,6 +89,10 @@
right->vm_start = end;
right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT;
vma->vm_flags = newflags;
+ left->vm_rd_behavior = vma->vm_rd_behavior;
+ left->vm_raend = 0;
+ right->vm_rd_behavior = vma->vm_rd_behavior;
+ right->vm_raend = 0;
if (vma->vm_file)
atomic_add(2, &vma->vm_file->f_count);

@@ -96,6 +104,7 @@
vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT;
vma->vm_start = start;
vma->vm_end = end;
+ vma->vm_raend = 0; /* reset ra context for middle area */
vma->vm_flags = newflags;
insert_vm_struct(current->mm, left);
insert_vm_struct(current->mm, right);
diff -ruN linux-2.3.29-ref/mm/mmap.c linux/mm/mmap.c
--- linux-2.3.29-ref/mm/mmap.c Fri Nov 26 19:54:31 1999
+++ linux/mm/mmap.c Sun Nov 28 23:00:05 1999
@@ -15,6 +15,7 @@

#include <asm/uaccess.h>
#include <asm/pgalloc.h>
+#include <asm/mman.h>

/* description of effects of mapping type and prot in current implementation.
* this is due to the limited x86 page protection hardware. The expected
@@ -251,6 +252,8 @@
vma->vm_start = addr;
vma->vm_end = addr + len;
vma->vm_flags = vm_flags(prot,flags) | mm->def_flags;
+ vma->vm_rd_behavior = MADV_NORMAL;
+ vma->vm_raend = 0;

if (file) {
if (file->f_mode & 1)
@@ -549,6 +552,8 @@
mpnt->vm_end = area->vm_end;
mpnt->vm_page_prot = area->vm_page_prot;
mpnt->vm_flags = area->vm_flags;
+ mpnt->vm_rd_behavior = area->vm_rd_behavior;
+ mpnt->vm_raend = 0;
mpnt->vm_ops = area->vm_ops;
mpnt->vm_pgoff = area->vm_pgoff;
area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT;
@@ -788,6 +793,8 @@
vma->vm_ops = NULL;
vma->vm_pgoff = 0;
vma->vm_file = NULL;
+ vma->vm_rd_behavior = MADV_NORMAL; /* doesn't matter for anon map */
+ vma->vm_raend = 0;
vma->vm_private_data = NULL;

/*
@@ -951,6 +958,14 @@
if (off != mpnt->vm_pgoff)
continue;
}
+
+ /*
+ * If we have a file, then mmap page-in behavior
+ * must be the same..
+ */
+ if ((mpnt->vm_file == prev->vm_file) &&
+ (mpnt->vm_rd_behavior != prev->vm_rd_behavior))
+ continue;

/* merge prev with mpnt and set up pointers so the new
* big segment can possibly merge with the next one.
diff -ruN linux-2.3.29-ref/mm/mprotect.c linux/mm/mprotect.c
--- linux-2.3.29-ref/mm/mprotect.c Fri Nov 26 19:54:31 1999
+++ linux/mm/mprotect.c Sun Nov 28 23:02:34 1999
@@ -106,6 +106,8 @@
n->vm_end = end;
n->vm_flags = newflags;
n->vm_page_prot = prot;
+ n->vm_rd_behavior = vma->vm_rd_behavior;
+ n->vm_raend = 0;
if (n->vm_file)
get_file(n->vm_file);
if (n->vm_ops && n->vm_ops->open)
@@ -132,6 +134,8 @@
n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT;
n->vm_flags = newflags;
n->vm_page_prot = prot;
+ n->vm_rd_behavior = vma->vm_rd_behavior;
+ n->vm_raend = 0;
if (n->vm_file)
get_file(n->vm_file);
if (n->vm_ops && n->vm_ops->open)
@@ -162,6 +166,10 @@
left->vm_end = start;
right->vm_start = end;
right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT;
+ left->vm_rd_behavior = vma->vm_rd_behavior;
+ left->vm_raend = 0;
+ right->vm_rd_behavior = vma->vm_rd_behavior;
+ right->vm_raend = 0;
if (vma->vm_file)
atomic_add(2,&vma->vm_file->f_count);
if (vma->vm_ops && vma->vm_ops->open) {
@@ -173,6 +181,7 @@
vma->vm_start = start;
vma->vm_end = end;
vma->vm_flags = newflags;
+ vma->vm_raend = 0; /* reset ra context for middle area */
vma->vm_page_prot = prot;
insert_vm_struct(current->mm, left);
insert_vm_struct(current->mm, right);
diff -ruN linux-2.3.29-ref/mm/mremap.c linux/mm/mremap.c
--- linux-2.3.29-ref/mm/mremap.c Fri Nov 26 19:54:31 1999
+++ linux/mm/mremap.c Sun Nov 28 23:03:47 1999
@@ -138,6 +138,8 @@
new_vma->vm_end = new_addr+new_len;
new_vma->vm_pgoff = vma->vm_pgoff;
new_vma->vm_pgoff += (addr - vma->vm_start) >> PAGE_SHIFT;
+ new_vma->vm_rd_behavior = vma->vm_rd_behavior;
+ new_vma->vm_raend = 0;
if (new_vma->vm_file)
get_file(new_vma->vm_file);
if (new_vma->vm_ops && new_vma->vm_ops->open)

- Chuck Lever

--
corporate:	<chuckl@netscape.com>
personal:	<chucklever@netscape.net> or <cel@monkey.org>

The Linux Scalability project: http://www.citi.umich.edu/projects/linux-scalability/

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/