Full support for more than 1024fds.

Andi Kleen (andi@mlm.extern.lrz-muenchen.de)
Sun, 19 Jan 1997 07:58:42 +0100


Hi,

Here is a new version of the patch to add support for more than
1024 files per process. I added a /proc/sys/kernel/nr-open now,
so you can tune NR_OPEN dynamically now. Applications have still
to be recompiled (with an appropiate -D__USER_FDSETSIZE value).
All childs after a change in /proc/sys/kernel/nr-open will have
a bigger file table. Please test this patch before I submit it
to Linus. It is against 2.1.21. Especially it would be nice if
someone with an Alpha could test it, to check that select() is
still 64bit clean (it should, but I can't test it).

Please send all bug reports to me. It works on my workstations,
but hasn't seen much testing. The patch contains some other small bits
I didn't submit yet.

-Andi

diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/arch/alpha/kernel/osf_sys.c linux-wrk/arch/alpha/kernel/osf_sys.c
--- /u2/unpack/linux-clean/arch/alpha/kernel/osf_sys.c Sat Nov 9 18:31:33 1996
+++ linux-wrk/arch/alpha/kernel/osf_sys.c Mon Jan 6 11:04:04 1997
@@ -100,8 +100,8 @@
struct file *file;
struct osf_dirent_callback buf;

- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- return -EBADF;
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
if (!file->f_op || !file->f_op->readdir)
return -ENOTDIR;
error = verify_area(VERIFY_WRITE, dirent, count);
@@ -181,8 +181,8 @@
if (flags & (MAP_HASSEMAPHORE | MAP_INHERIT | MAP_UNALIGNED))
printk("%s: unimplemented OSF mmap flags %04lx\n", current->comm, flags);
if (!(flags & MAP_ANONYMOUS)) {
- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- return -EBADF;
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
}
flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
return do_mmap(file, addr, len, prot, flags, off);
@@ -258,8 +258,8 @@
return retval;
if (bufsiz > sizeof(struct osf_statfs))
bufsiz = sizeof(struct osf_statfs);
- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- return -EBADF;
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
if (!(inode = file->f_inode))
return -ENOENT;
if (!inode->i_sb->s_op->statfs)
@@ -501,7 +501,7 @@

asmlinkage unsigned long sys_getdtablesize(void)
{
- return NR_OPEN;
+ return global_nr_open; /* ?? is this right? */
}

asmlinkage int sys_pipe(int a0, int a1, int a2, int a3, int a4, int a5,
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/arch/i386/kernel/signal.c linux-wrk/arch/i386/kernel/signal.c
--- /u2/unpack/linux-clean/arch/i386/kernel/signal.c Sat Jan 18 22:06:32 1997
+++ linux-wrk/arch/i386/kernel/signal.c Mon Jan 6 11:06:48 1997
@@ -43,7 +43,7 @@
}
}

-static inline void restore_i387_hard(struct _fpstate *buf)
+static inline int restore_i387_hard(struct _fpstate *buf)
{
#ifdef __SMP__
if (current->flags & PF_USEDFPU) {
@@ -57,19 +57,19 @@
#endif
current->used_math = 1;
current->flags &= ~PF_USEDFPU;
- copy_from_user(&current->tss.i387.hard, buf, sizeof(*buf));
+ return copy_from_user(&current->tss.i387.hard, buf, sizeof(*buf));
}

-static void restore_i387(struct _fpstate *buf)
+static int restore_i387(struct _fpstate *buf)
{
#ifndef CONFIG_MATH_EMULATION
- restore_i387_hard(buf);
+ return restore_i387_hard(buf);
#else
if (hard_math) {
- restore_i387_hard(buf);
- return;
+ return restore_i387_hard(buf);
}
restore_i387_soft(buf);
+ return 0;
#endif
}

@@ -122,9 +122,8 @@
regs->orig_eax = -1; /* disable syscall checks */
if (context->fpstate) {
struct _fpstate * buf = context->fpstate;
- if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
- goto badframe;
- restore_i387(buf);
+ if (restore_i387(buf))
+ goto badframe;
}
return context->eax;
badframe:
@@ -148,7 +147,10 @@
}
#endif
current->tss.i387.hard.status = current->tss.i387.hard.swd;
- copy_to_user(buf, &current->tss.i387.hard, sizeof(*buf));
+
+ /* We would use a __copy_to_user() here if it exists [hint, hint;] */
+ if (copy_to_user(buf, &current->tss.i387.hard, sizeof(*buf)))
+ buf = NULL;
current->used_math = 0;
return buf;
}
@@ -181,18 +183,19 @@
if ((regs->xss & 0xffff) != USER_DS && sa->sa_restorer)
frame = (unsigned long *) sa->sa_restorer;
frame -= 64;
- if (!access_ok(VERIFY_WRITE,frame,64*4))
+ if (verify_area(VERIFY_WRITE,frame,64*4))
do_exit(SIGSEGV);

/* set up the "normal" stack seen by the signal handler (iBCS2) */
#define __CODE ((unsigned long)(frame+24))
#define CODE(x) ((unsigned long *) ((x)+__CODE))

- /* XXX Can possible miss a SIGSEGV when frame crosses a page border
- and a thread unmaps it while we are accessing it.
- So either check all put_user() calls or don't do it at all.
- We use __put_user() here because the access_ok() call was already
- done earlier. */
+ /* We don't want to check all __put_user() so we only check
+ the first and the last (to catch the error when the stack frame
+ crosses a page border).
+
+ We use __put_user() here because the access_ok() call was already done
+ by verify_area. */
if (__put_user(__CODE,frame))
do_exit(SIGSEGV);
if (current->exec_domain && current->exec_domain->signal_invmap)
@@ -223,7 +226,6 @@
__put_user(regs->eflags, frame+18);
__put_user(regs->esp, frame+19);
__put_user(regs->xss, frame+20);
- __put_user((unsigned long) save_i387((struct _fpstate *)(frame+32)),frame+21);
/* non-iBCS2 extensions.. */
__put_user(oldmask, frame+22);
__put_user(current->tss.cr2, frame+23);
@@ -231,6 +233,13 @@
__put_user(0x0000b858, CODE(0)); /* popl %eax ; movl $,%eax */
__put_user(0x80cd0000, CODE(4)); /* int $0x80 */
__put_user(__NR_sigreturn, CODE(2));
+ {
+ struct _fpstate *buf;
+
+ buf = save_i387((struct _fpstate *)(frame+32));
+ if (!buf || __put_user((unsigned long)buf, frame+21))
+ do_exit(SIGSEGV);
+ }
#undef __CODE
#undef CODE

diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/arch/m68k/kernel/sys_m68k.c linux-wrk/arch/m68k/kernel/sys_m68k.c
--- /u2/unpack/linux-clean/arch/m68k/kernel/sys_m68k.c Sun Jan 5 22:32:22 1997
+++ linux-wrk/arch/m68k/kernel/sys_m68k.c Mon Jan 6 11:04:14 1997
@@ -69,8 +69,8 @@
return error;
copy_from_user(&a, arg, sizeof(a));
if (!(a.flags & MAP_ANONYMOUS)) {
- if (a.fd >= NR_OPEN || !(file = current->files->fd[a.fd]))
- return -EBADF;
+ if (!(file = file_from-fd(a.fd)))
+ return -EBADF;
}
a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
error = do_mmap(file, a.addr, a.len, a.prot, a.flags, a.offset);
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/arch/mips/kernel/syscall.c linux-wrk/arch/mips/kernel/syscall.c
--- /u2/unpack/linux-clean/arch/mips/kernel/syscall.c Fri Apr 12 08:49:30 1996
+++ linux-wrk/arch/mips/kernel/syscall.c Mon Jan 6 11:04:14 1997
@@ -42,8 +42,8 @@
struct file * file = NULL;

if (flags & MAP_RENAME) {
- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- return -EBADF;
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
}
flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);

diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/arch/ppc/kernel/syscalls.c linux-wrk/arch/ppc/kernel/syscalls.c
--- /u2/unpack/linux-clean/arch/ppc/kernel/syscalls.c Sun Jan 5 22:31:59 1997
+++ linux-wrk/arch/ppc/kernel/syscalls.c Mon Jan 6 11:04:14 1997
@@ -41,8 +41,8 @@
struct file * file = NULL;

if (!(flags & MAP_ANONYMOUS)) {
- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- return -EBADF;
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
}
flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);

@@ -69,8 +69,8 @@
if (!(flags & MAP_ANONYMOUS)) {
unsigned long fd;
get_user(fd,buffer+4);
- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- return -EBADF;
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
}
flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
if ( get_user(a,buffer) || get_user(b,buffer+1) ||
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/arch/sparc/kernel/sunos_ioctl.c linux-wrk/arch/sparc/kernel/sunos_ioctl.c
--- /u2/unpack/linux-clean/arch/sparc/kernel/sunos_ioctl.c Sun Jan 5 22:30:42 1997
+++ linux-wrk/arch/sparc/kernel/sunos_ioctl.c Mon Jan 6 11:04:14 1997
@@ -34,8 +34,8 @@
struct file *filp;
int ret;

- if (fd >= NR_OPEN || !(filp = current->files->fd [fd]))
- return -EBADF;
+ if (!(filp = file_from_fd(fd)))
+ return -EBADF;

/* First handle an easy compat. case for tty ldisc. */
if(cmd == TIOCSETD) {
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/arch/sparc/kernel/sys_sparc.c linux-wrk/arch/sparc/kernel/sys_sparc.c
--- /u2/unpack/linux-clean/arch/sparc/kernel/sys_sparc.c Sun Jan 5 22:32:55 1997
+++ linux-wrk/arch/sparc/kernel/sys_sparc.c Mon Jan 6 11:04:14 1997
@@ -154,9 +154,8 @@
long retval;

if (!(flags & MAP_ANONYMOUS)) {
- if (fd >= NR_OPEN || !(file = current->files->fd[fd])){
- return -EBADF;
- }
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
}
if(!(flags & MAP_FIXED) && !addr) {
addr = get_unmapped_area(addr, len);
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/arch/sparc/kernel/sys_sunos.c linux-wrk/arch/sparc/kernel/sys_sunos.c
--- /u2/unpack/linux-clean/arch/sparc/kernel/sys_sunos.c Sun Jan 19 07:32:48 1997
+++ linux-wrk/arch/sparc/kernel/sys_sunos.c Sun Jan 19 06:48:12 1997
@@ -86,8 +86,8 @@
flags &= ~MAP_NORESERVE;
}
if(!(flags & MAP_ANONYMOUS))
- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- return -EBADF;
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
if(!(flags & MAP_FIXED) && !addr) {
addr = get_unmapped_area(addr, len);
if(!addr)
@@ -328,7 +328,7 @@
*/
asmlinkage long sunos_getdtablesize(void)
{
- return NR_OPEN;
+ return global_nr_open; /* ?? is this right */
}
#define _S(nr) (1<<((nr)-1))

@@ -412,8 +412,8 @@
struct sunos_dirent_callback buf;
int error;

- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- return -EBADF;
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
if (!file->f_op || !file->f_op->readdir)
return -ENOTDIR;
if(cnt < (sizeof(struct sunos_dirent) + 255))
@@ -479,8 +479,8 @@
struct sunos_direntry_callback buf;
int error;

- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- return -EBADF;
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
if (!file->f_op || !file->f_op->readdir)
return -ENOTDIR;
if(cnt < (sizeof(struct sunos_direntry) + 255))
@@ -915,7 +915,7 @@
select_table wait_table, *wait;
struct select_table_entry *entry;

- if (nfds > NR_OPEN)
+ if (nfds > current->files->nr_open)
return -EINVAL;

if (!(entry = (struct select_table_entry*)__get_free_page(GFP_KERNEL))
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/drivers/block/loop.c linux-wrk/drivers/block/loop.c
--- /u2/unpack/linux-clean/drivers/block/loop.c Sat Jan 18 22:06:32 1997
+++ linux-wrk/drivers/block/loop.c Mon Jan 6 11:04:14 1997
@@ -278,7 +278,7 @@
struct file *file;
struct inode *inode;

- if (arg >= NR_OPEN || !(file = current->files->fd[arg]))
+ if (!(file = file_from_fd(arg)))
return -EBADF;
if (lo->lo_inode)
return -EBUSY;
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/drivers/char/tty_io.c linux-wrk/drivers/char/tty_io.c
--- /u2/unpack/linux-clean/drivers/char/tty_io.c Sat Jan 18 22:06:33 1997
+++ linux-wrk/drivers/char/tty_io.c Mon Jan 6 11:04:14 1997
@@ -1713,7 +1713,7 @@
((session > 0) && ((*p)->session == session)))
send_sig(SIGKILL, *p, 1);
else if ((*p)->files) {
- for (i=0; i < NR_OPEN; i++) {
+ for (i=0; i < (*p)->files->nr_open; i++) {
filp = (*p)->files->fd[i];
if (filp && (filp->f_op == &tty_fops) &&
(filp->private_data == tty)) {
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/drivers/scsi/scsi_ioctl.c linux-wrk/drivers/scsi/scsi_ioctl.c
--- /u2/unpack/linux-clean/drivers/scsi/scsi_ioctl.c Sun Jan 5 22:31:39 1997
+++ linux-wrk/drivers/scsi/scsi_ioctl.c Mon Jan 6 11:04:14 1997
@@ -36,15 +36,13 @@

static int ioctl_probe(struct Scsi_Host * host, void *buffer)
{
- int temp, result;
+ int temp;
unsigned int len,slen;
const char * string;

if ((temp = host->hostt->present) && buffer) {
- result = verify_area(VERIFY_READ, buffer, sizeof(long));
- if (result) return result;
-
- get_user(len, (unsigned int *) buffer);
+ if (get_user(len, (unsigned int *) buffer))
+ return -EFAULT;
if(host->hostt->info)
string = host->hostt->info(host);
else
@@ -53,10 +51,8 @@
slen = strlen(string);
if (len > slen)
len = slen + 1;
- result = verify_area(VERIFY_WRITE, buffer, len);
- if (result) return result;
-
- copy_to_user (buffer, string, len);
+ if (copy_to_user (buffer, string, len))
+ return -EFAULT;
}
}
return temp;
@@ -175,13 +171,6 @@

if (!sic)
return -EINVAL;
-
-
- /*
- * Verify that we can read at least this much.
- */
- result = verify_area(VERIFY_READ, sic, sizeof (Scsi_Ioctl_Command));
- if (result) return result;

/*
* The structure that we are passed should look like:
@@ -193,8 +182,9 @@
* unsigned char data[];
* };
*/
- get_user(inlen, &sic->inlen);
- get_user(outlen, &sic->outlen);
+ if (get_user(inlen, &sic->inlen) ||
+ get_user(outlen, &sic->outlen))
+ return -EFAULT;

/*
* We do not transfer more than MAX_BUF with this interface.
@@ -205,7 +195,8 @@
if( outlen > MAX_BUF ) outlen = MAX_BUF;

cmd_in = sic->data;
- get_user(opcode, cmd_in);
+ if (get_user(opcode, cmd_in))
+ return -EFAULT;

needed = buf_needed = (inlen > outlen ? inlen : outlen);
if(buf_needed){
@@ -222,18 +213,15 @@
*/
cmdlen = COMMAND_SIZE(opcode);

- result = verify_area(VERIFY_READ, cmd_in,
- cmdlen + inlen > MAX_BUF ? MAX_BUF : inlen);
- if (result) return result;
-
- copy_from_user ((void *) cmd, cmd_in, cmdlen);
+ if (copy_from_user ((void *) cmd, cmd_in, cmdlen))
+ return -EFAULT;

/*
* Obtain the data to be sent to the device (if any).
*/
- copy_from_user ((void *) buf,
+ if (copy_from_user ((void *) buf,
(void *) (cmd_in + cmdlen),
- inlen);
+ inlen)) return -EFAULT;

/*
* Set the lun field to the correct value.
@@ -272,17 +260,13 @@
* If there was an error condition, pass the info back to the user.
*/
if(SCpnt->result) {
- result = verify_area(VERIFY_WRITE,
- cmd_in,
- sizeof(SCpnt->sense_buffer));
- if (result) return result;
- copy_to_user((void *) cmd_in,
+ if (copy_to_user((void *) cmd_in,
SCpnt->sense_buffer,
- sizeof(SCpnt->sense_buffer));
+ sizeof(SCpnt->sense_buffer)))
+ return -EFAULT;
} else {
- result = verify_area(VERIFY_WRITE, cmd_in, outlen);
- if (result) return result;
- copy_to_user ((void *) cmd_in, buf, outlen);
+ if (copy_to_user ((void *) cmd_in, buf, outlen))
+ return -EFAULT;
}
result = SCpnt->result;

@@ -320,7 +304,6 @@
*/
int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg)
{
- int result;
char scsi_cmd[12];

/* No idea how this happens.... */
@@ -328,16 +311,13 @@

switch (cmd) {
case SCSI_IOCTL_GET_IDLUN:
- result = verify_area(VERIFY_WRITE, arg, sizeof (Scsi_Idlun));
- if (result) return result;
-
- put_user(dev->id
+ if (put_user(dev->id
+ (dev->lun << 8)
+ (dev->channel << 16)
+ ((dev->host->hostt->proc_dir->low_ino & 0xff) << 24),
- &((Scsi_Idlun *) arg)->dev_id);
- put_user(dev->host->unique_id, &((Scsi_Idlun *) arg)->host_unique_id);
- return 0;
+ &((Scsi_Idlun *) arg)->dev_id))
+ return -EFAULT;
+ return put_user(dev->host->unique_id, &((Scsi_Idlun *) arg)->host_unique_id);
case SCSI_IOCTL_TAGGED_ENABLE:
if(!suser()) return -EACCES;
if(!dev->tagged_supported) return -EINVAL;
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/drivers/scsi/st_options.h linux-wrk/drivers/scsi/st_options.h
--- /u2/unpack/linux-clean/drivers/scsi/st_options.h Sun Jan 19 07:32:49 1997
+++ linux-wrk/drivers/scsi/st_options.h Sun Jan 19 06:48:13 1997
@@ -92,4 +92,6 @@
The default is BSD semantics. */
#define ST_SYSV 0

+#define ST_SYSV 0
+
#endif
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/buffer.c linux-wrk/fs/buffer.c
--- /u2/unpack/linux-clean/fs/buffer.c Sat Jan 18 22:06:36 1997
+++ linux-wrk/fs/buffer.c Mon Jan 6 11:04:14 1997
@@ -1,3 +1,4 @@
+
/*
* linux/fs/buffer.c
*
@@ -263,7 +264,7 @@
struct file * file;
struct inode * inode;

- if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
+ if (!(file = file_from_fd(fd)) || !(inode=file->f_inode))
return -EBADF;
if (!file->f_op || !file->f_op->fsync)
return -EINVAL;
@@ -277,7 +278,7 @@
struct file * file;
struct inode * inode;

- if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
+ if (!(file = file_from_fd(fd)) || !(inode=file->f_inode))
return -EBADF;
if (!file->f_op || !file->f_op->fsync)
return -EINVAL;
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/exec.c linux-wrk/fs/exec.c
--- /u2/unpack/linux-clean/fs/exec.c Sat Jan 18 22:06:36 1997
+++ linux-wrk/fs/exec.c Mon Jan 6 11:04:16 1997
@@ -420,10 +420,10 @@
unsigned long set, i;

i = j * __NFDBITS;
- if (i >= NR_OPEN)
+ if (i >= files->nr_open)
break;
- set = files->close_on_exec.fds_bits[j];
- files->close_on_exec.fds_bits[j] = 0;
+ set = files->close_on_exec->fds_bits[j];
+ files->close_on_exec->fds_bits[j] = 0;
j++;
for ( ; set ; i++,set >>= 1) {
if (set & 1)
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/ext2/symlink.c linux-wrk/fs/ext2/symlink.c
--- /u2/unpack/linux-clean/fs/ext2/symlink.c Sat Jan 18 22:06:36 1997
+++ linux-wrk/fs/ext2/symlink.c Thu Jan 16 14:08:37 1997
@@ -103,7 +103,7 @@
static int ext2_readlink (struct inode * inode, char * buffer, int buflen)
{
struct buffer_head * bh = NULL;
- char * link;
+ char * link, *p;
int i, err;

if (!S_ISLNK(inode->i_mode)) {
@@ -123,8 +123,8 @@
else
link = (char *) inode->u.ext2_i.i_data;

- /* XXX I hope link is always '\0'-terminated. */
- i = strlen(link)+1;
+ p = memchr(link,'\0',buflen);
+ i = p ? (p - link) : buflen;
if (i > buflen)
i = buflen;
if (copy_to_user(buffer, link, i))
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/fcntl.c linux-wrk/fs/fcntl.c
--- /u2/unpack/linux-clean/fs/fcntl.c Sun Jan 5 22:32:31 1997
+++ linux-wrk/fs/fcntl.c Mon Jan 6 11:04:21 1997
@@ -19,27 +19,28 @@
static inline int dupfd(unsigned int fd, unsigned int arg)
{
struct files_struct * files = current->files;
+ struct file *oldfile;

- if (fd >= NR_OPEN || !files->fd[fd])
- return -EBADF;
- if (arg >= NR_OPEN)
+ if (!(oldfile = file_from_fd(fd)))
+ return -EBADF;
+ if (arg >= files->nr_open)
return -EINVAL;
- arg = find_next_zero_bit(&files->open_fds, NR_OPEN, arg);
+ arg = find_next_zero_bit(files->open_fds, files->nr_open, arg);
if (arg >= current->rlim[RLIMIT_NOFILE].rlim_cur)
return -EMFILE;
- FD_SET(arg, &files->open_fds);
- FD_CLR(arg, &files->close_on_exec);
- (files->fd[arg] = files->fd[fd])->f_count++;
+ FD_SET(arg, files->open_fds);
+ FD_CLR(arg, files->close_on_exec);
+ (files->fd[arg] = oldfile)->f_count++;
return arg;
}

asmlinkage int sys_dup2(unsigned int oldfd, unsigned int newfd)
{
- if (oldfd >= NR_OPEN || !current->files->fd[oldfd])
- return -EBADF;
+ if (!file_from_fd(oldfd))
+ return -EBADF;
if (newfd == oldfd)
return newfd;
- if (newfd >= NR_OPEN)
+ if (newfd >= current->files->nr_open)
return -EBADF; /* following POSIX.1 6.2.1 */

sys_close(newfd);
@@ -57,18 +58,18 @@
struct task_struct *p;
int task_found = 0;

- if (fd >= NR_OPEN || !(filp = current->files->fd[fd]))
- return -EBADF;
+ if (!(filp = file_from_fd(fd)))
+ return -EBADF;
switch (cmd) {
case F_DUPFD:
return dupfd(fd,arg);
case F_GETFD:
- return FD_ISSET(fd, &current->files->close_on_exec);
+ return FD_ISSET(fd, current->files->close_on_exec);
case F_SETFD:
if (arg&1)
- FD_SET(fd, &current->files->close_on_exec);
+ FD_SET(fd, current->files->close_on_exec);
else
- FD_CLR(fd, &current->files->close_on_exec);
+ FD_CLR(fd, current->files->close_on_exec);
return 0;
case F_GETFL:
return filp->f_flags;
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/ioctl.c linux-wrk/fs/ioctl.c
--- /u2/unpack/linux-clean/fs/ioctl.c Sun Jan 5 22:32:31 1997
+++ linux-wrk/fs/ioctl.c Mon Jan 6 11:04:21 1997
@@ -50,15 +50,15 @@
unsigned int flag;
int on, error;

- if (fd >= NR_OPEN || !(filp = current->files->fd[fd]))
- return -EBADF;
+ if (!(filp = file_from_fd(fd)))
+ return -EBADF;
switch (cmd) {
case FIOCLEX:
- FD_SET(fd, &current->files->close_on_exec);
+ FD_SET(fd, current->files->close_on_exec);
return 0;

case FIONCLEX:
- FD_CLR(fd, &current->files->close_on_exec);
+ FD_CLR(fd, current->files->close_on_exec);
return 0;

case FIONBIO:
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/locks.c linux-wrk/fs/locks.c
--- /u2/unpack/linux-clean/fs/locks.c Sat Jan 18 22:06:36 1997
+++ linux-wrk/fs/locks.c Mon Jan 6 11:04:22 1997
@@ -241,8 +241,8 @@
struct file_lock file_lock;
struct file *filp;

- if ((fd >= NR_OPEN) || !(filp = current->files->fd[fd]))
- return (-EBADF);
+ if (!(filp = file_from_fd(fd)))
+ return -EBADF;

if (!flock_make_lock(filp, &file_lock, cmd))
return (-EINVAL);
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/ncpfs/inode.c linux-wrk/fs/ncpfs/inode.c
--- /u2/unpack/linux-clean/fs/ncpfs/inode.c Sun Jan 19 07:32:50 1997
+++ linux-wrk/fs/ncpfs/inode.c Sun Jan 19 06:48:13 1997
@@ -186,27 +186,28 @@
return NULL;
}

- if ( (data->ncp_fd >= NR_OPEN)
- || ((ncp_filp = current->files->fd[data->ncp_fd]) == NULL)
- || (!S_ISSOCK(ncp_filp->f_inode->i_mode)))
+ /* XXX shouldn't
+ if (!(ncp_filp = file_from_fd(data->ncp_fd)) ||
+ !(ncp->filp->f_inode) ||
+ (!S_ISSOCK(ncp_filp->f_inode->i_mode)))
{
printk("ncp_read_super: invalid ncp socket\n");
sb->s_dev = 0;
return NULL;
}

- if ( (data->wdog_fd >= NR_OPEN)
- || ((wdog_filp = current->files->fd[data->wdog_fd]) == NULL)
- || (!S_ISSOCK(wdog_filp->f_inode->i_mode)))
+ if (!(wdog_filp = file_from_fd(data->wdog_fd)) ||
+ !(wdog_filp->f_inode) ||
+ (!S_ISSOCK(wdog_filp->f_inode->i_mode)))
{
printk("ncp_read_super: invalid wdog socket\n");
sb->s_dev = 0;
return NULL;
}

- if ( (data->message_fd >= NR_OPEN)
- || ((msg_filp = current->files->fd[data->message_fd]) == NULL)
- || (!S_ISSOCK(msg_filp->f_inode->i_mode)))
+ if (!(msg_filp = file_from_fd(data->message_fd)) ||
+ !(msg_filp->f_inode) ||
+ (!S_ISSOCK(msg_filp->f_inode->i_mode)))
{
printk("ncp_read_super: invalid wdog socket\n");
sb->s_dev = 0;
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/nfs/inode.c linux-wrk/fs/nfs/inode.c
--- /u2/unpack/linux-clean/fs/nfs/inode.c Sun Jan 19 07:32:50 1997
+++ linux-wrk/fs/nfs/inode.c Sun Jan 19 06:48:14 1997
@@ -122,7 +122,7 @@
printk("nfs warning: mount version %s than kernel\n",
data->version < NFS_MOUNT_VERSION ? "older" : "newer");
}
- if (fd >= NR_OPEN || !(filp = current->files->fd[fd])) {
+ if (!(filp = file_from_fd(fd))) {
printk("nfs_read_super: invalid file descriptor\n");
sb->s_dev = 0;
MOD_DEC_USE_COUNT;
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/open.c linux-wrk/fs/open.c
--- /u2/unpack/linux-clean/fs/open.c Sat Jan 18 22:06:36 1997
+++ linux-wrk/fs/open.c Mon Jan 6 12:24:18 1997
@@ -18,6 +18,7 @@
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/file.h>
+#include <linux/sysctl.h>

#include <asm/uaccess.h>
#include <asm/bitops.h>
@@ -51,8 +52,8 @@
error = verify_area(VERIFY_WRITE, buf, sizeof(struct statfs));
if (error)
return error;
- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- return -EBADF;
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
if (!(inode = file->f_inode))
return -ENOENT;
if (!inode->i_sb)
@@ -131,8 +132,8 @@
struct file * file;
int error;

- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- return -EBADF;
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
if (!(inode = file->f_inode))
return -ENOENT;
if (S_ISDIR(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
@@ -290,8 +291,8 @@
struct file * file;
int error;

- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- return -EBADF;
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
if (!(inode = file->f_inode))
return -ENOENT;
if (!S_ISDIR(inode->i_mode))
@@ -331,8 +332,8 @@
struct file * file;
struct iattr newattrs;

- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- return -EBADF;
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
if (!(inode = file->f_inode))
return -ENOENT;
if (IS_RDONLY(inode))
@@ -381,8 +382,8 @@
struct iattr newattrs;
int error;

- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
- return -EBADF;
+ if (!(file = file_from_fd(fd)))
+ return -EBADF;
if (!(inode = file->f_inode))
return -ENOENT;
if (IS_RDONLY(inode))
@@ -555,10 +556,10 @@
int fd;
struct files_struct * files = current->files;

- fd = find_first_zero_bit(&files->open_fds, NR_OPEN);
+ fd = find_first_zero_bit(files->open_fds, files->nr_open);
if (fd < current->rlim[RLIMIT_NOFILE].rlim_cur) {
- FD_SET(fd, &files->open_fds);
- FD_CLR(fd, &files->close_on_exec);
+ FD_SET(fd, files->open_fds);
+ FD_CLR(fd, files->close_on_exec);
return fd;
}
return -EMFILE;
@@ -566,7 +567,7 @@

inline void put_unused_fd(int fd)
{
- FD_CLR(fd, &current->files->open_fds);
+ FD_CLR(fd, current->files->open_fds);
}

asmlinkage int sys_open(const char * filename,int flags,int mode)
@@ -611,18 +612,27 @@
iput(inode);
}

+#define DEBUG_CLOSE_FP
+
int close_fp(struct file *filp)
{
struct inode *inode;
-
+#ifdef DEBUG_CLOSE_FP
+ if (!filp) {
+ printk(KERN_DEBUG "close_fp: filp == NULL. called from %s\n",
+ __builtin_return_address(0));
+ return 0;
+ }
+#endif
if (filp->f_count == 0) {
printk("VFS: Close: file count is 0\n");
return 0;
}
inode = filp->f_inode;
- if (inode)
+ if (inode) {
locks_remove_locks(current, filp);
- fput(filp, inode);
+ fput(filp, inode);
+ }
return 0;
}

@@ -634,9 +644,9 @@

files = current->files;
error = -EBADF;
- if (fd < NR_OPEN && (filp = files->fd[fd]) != NULL) {
+ if ((filp = file_from_fd(fd)) != NULL) {
put_unused_fd(fd);
- FD_CLR(fd, &files->close_on_exec);
+ FD_CLR(fd, files->close_on_exec);
files->fd[fd] = NULL;
error = close_fp(filp);
}
@@ -655,4 +665,26 @@
if (current->tty)
tty_vhangup(current->tty);
return 0;
+}
+
+int global_nr_open = NR_OPEN;
+
+#define NR_OPEN_MIN 256
+/* #define NR_OPEN_MAX (PAGE_SIZE/6*8) */
+
+int sysctl_proc_nropen(ctl_table *ctl, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ int ret;
+ ret = proc_dointvec(ctl, write, filp, buffer, lenp);
+ if (write) {
+ if (global_nr_open < NR_OPEN_MIN)
+ global_nr_open = NR_OPEN_MIN;
+#if 0
+ if (global_nr_open > NR_OPEN_MAX)
+ global_nr_open = NR_OPEN_MAX;
+#endif
+ global_nr_open = (global_nr_open+sizeof(long)-1) & ~(sizeof(long)*8-1);
+ }
+ return ret;
}
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/proc/fd.c linux-wrk/fs/proc/fd.c
--- /u2/unpack/linux-clean/fs/proc/fd.c Sat Nov 9 18:31:55 1996
+++ linux-wrk/fs/proc/fd.c Mon Jan 6 11:04:26 1997
@@ -58,7 +58,7 @@
unsigned int ino, pid, fd, c;
struct task_struct * p;
struct super_block * sb;
- int i;
+ int i;

*result = NULL;
ino = dir->i_ino;
@@ -106,8 +106,12 @@
if (!pid || i >= NR_TASKS)
return -ENOENT;

- if (fd >= NR_OPEN || !p->files->fd[fd] || !p->files->fd[fd]->f_inode)
- return -ENOENT;
+ {
+ struct file *f;
+
+ if (!(f = file_from_fd(fd)) || !(f->f_inode))
+ return -ENOENT;
+ }

ino = (pid << 16) + (PROC_PID_FD_DIR << 8) + fd;

@@ -151,7 +155,7 @@
return 0;
}

- for (fd -= 2 ; fd < NR_OPEN; fd++, filp->f_pos++) {
+ for (fd -= 2 ; fd < p->files->nr_open; fd++, filp->f_pos++) {
if (!p->files)
break;
if (!p->files->fd[fd] || !p->files->fd[fd]->f_inode)
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/proc/inode.c linux-wrk/fs/proc/inode.c
--- /u2/unpack/linux-clean/fs/proc/inode.c Sun Jan 5 22:32:31 1997
+++ linux-wrk/fs/proc/inode.c Mon Jan 6 11:04:26 1997
@@ -250,18 +250,21 @@
return;
}
switch (ino >> 8) {
- case PROC_PID_FD_DIR:
+ case PROC_PID_FD_DIR: {
+ struct file *filp;
+
ino &= 0xff;
- if (ino >= NR_OPEN || !p->files->fd[ino])
- return;
+ if (!(filp = file_from_fd(ino)))
+ return;
inode->i_op = &proc_link_inode_operations;
inode->i_size = 64;
inode->i_mode = S_IFLNK;
- if (p->files->fd[ino]->f_mode & 1)
+ if (filp->f_mode & 1)
inode->i_mode |= S_IRUSR | S_IXUSR;
- if (p->files->fd[ino]->f_mode & 2)
+ if (filp->f_mode & 2)
inode->i_mode |= S_IWUSR | S_IXUSR;
return;
+ }
}
return;
}
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/proc/link.c linux-wrk/fs/proc/link.c
--- /u2/unpack/linux-clean/fs/proc/link.c Sat Nov 9 18:31:55 1996
+++ linux-wrk/fs/proc/link.c Mon Jan 6 11:04:26 1997
@@ -121,8 +121,10 @@
if (!p->files)
break;
ino &= 0xff;
- if (ino < NR_OPEN && p->files->fd[ino]) {
- new_inode = p->files->fd[ino]->f_inode;
+ {
+ struct file *f;
+ if ((f = file_from_fd(ino)) != NULL)
+ new_inode = f->f_inode;
}
break;
}
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/read_write.c linux-wrk/fs/read_write.c
--- /u2/unpack/linux-clean/fs/read_write.c Sun Jan 5 22:31:42 1997
+++ linux-wrk/fs/read_write.c Mon Jan 6 11:04:26 1997
@@ -61,9 +61,8 @@
struct inode * inode;

retval = -EBADF;
- if (fd >= NR_OPEN ||
- !(file = current->files->fd[fd]) ||
- !(inode = file->f_inode))
+ if (!(file = file_from_fd(fd)) ||
+ !(inode = file->f_inode))
goto bad;
retval = -EINVAL;
if (origin > 2)
@@ -83,8 +82,7 @@
long long offset;

retval = -EBADF;
- if (fd >= NR_OPEN ||
- !(file = current->files->fd[fd]) ||
+ if (!(file = file_from_fd(fd)) ||
!(inode = file->f_inode))
goto bad;
retval = -EINVAL;
@@ -260,7 +258,7 @@
struct file * file;
struct inode * inode;

- if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || !(inode = file->f_inode))
+ if (!(file = file_from_fd(fd)) || !(inode = file->f_inode))
return -EBADF;
if (!(file->f_mode & 1))
return -EBADF;
@@ -273,7 +271,7 @@
struct file * file;
struct inode * inode;

- if (fd >= NR_OPEN || !(file = current->files->fd[fd]) || !(inode = file->f_inode))
+ if (!(file = file_from_fd(fd)) || !(inode = file->f_inode))
return -EBADF;
if (!(file->f_mode & 2))
return -EBADF;
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/readdir.c linux-wrk/fs/readdir.c
--- /u2/unpack/linux-clean/fs/readdir.c Sat Nov 9 18:31:56 1996
+++ linux-wrk/fs/readdir.c Mon Jan 6 11:04:26 1997
@@ -59,7 +59,7 @@
struct file * file;
struct readdir_callback buf;

- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
+ if (!(file = file_from_fd(fd)))
return -EBADF;
if (!file->f_op || !file->f_op->readdir)
return -ENOTDIR;
@@ -123,7 +123,7 @@
struct getdents_callback buf;
int error;

- if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
+ if (!(file = file_from_fd(fd)))
return -EBADF;
if (!file->f_op || !file->f_op->readdir)
return -ENOTDIR;
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/select.c linux-wrk/fs/select.c
--- /u2/unpack/linux-clean/fs/select.c Sun Jan 5 22:33:02 1997
+++ linux-wrk/fs/select.c Sun Jan 19 07:38:33 1997
@@ -8,6 +8,10 @@
* COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
* flag set in its personality we do *not* modify the given timeout
* parameter to reflect time remaining.
+ * 4 Jan 1997 Andi Kleen, <andi@mlm.extern.lrz-muenchen.de>
+ * Added support for a dynamic sized fd_set.
+ * Removed all verify_area() calls.
+ *
*/

#include <linux/types.h>
@@ -21,9 +25,11 @@
#include <linux/errno.h>
#include <linux/personality.h>
#include <linux/mm.h>
+#include <linux/malloc.h>

#include <asm/uaccess.h>
#include <asm/system.h>
+#include <asm/page.h>

#define ROUND_UP(x,y) (((x)+(y)-1)/(y))

@@ -84,30 +90,21 @@
: \
(flag != SEL_EX))

-/*
- * Due to kernel stack usage, we use a _limited_ fd_set type here, and once
- * we really start supporting >256 file descriptors we'll probably have to
- * allocate the kernel fd_set copies dynamically.. (The kernel select routines
- * are careful to touch only the defined low bits of any fd_set pointer, this
- * is important for performance too).
- */
-typedef unsigned long limited_fd_set[NR_OPEN/(8*(sizeof(unsigned long)))];
+typedef unsigned long * fds_ptr;
+
+/* assumes sizeof(type) is a power of 2 */
+#define roundbit(n, type) (((n) + sizeof(type)*8 - 1) & ~(sizeof(type)*8-1))

-typedef struct {
- limited_fd_set in, out, ex;
- limited_fd_set res_in, res_out, res_ex;
-} fd_set_buffer;
-
-#define __IN(in) (in)
-#define __OUT(in) (in + sizeof(limited_fd_set)/sizeof(unsigned long))
-#define __EX(in) (in + 2*sizeof(limited_fd_set)/sizeof(unsigned long))
-#define __RES_IN(in) (in + 3*sizeof(limited_fd_set)/sizeof(unsigned long))
-#define __RES_OUT(in) (in + 4*sizeof(limited_fd_set)/sizeof(unsigned long))
-#define __RES_EX(in) (in + 5*sizeof(limited_fd_set)/sizeof(unsigned long))
+#define __IN(fds,sz) (fds)
+#define __OUT(fds,sz) ((unsigned long *) ((char *)(fds) + sz))
+#define __EX(fds,sz) ((unsigned long *) ((char *)(fds) + 2*sz))
+#define __RES_IN(fds,sz) ((unsigned long *) ((char *)(fds) + 3*sz))
+#define __RES_OUT(fds,sz) ((unsigned long *) ((char *)(fds) + 4*sz))
+#define __RES_EX(fds,sz) ((unsigned long *) ((char *)(fds) + 5*sz))

-#define BITS(in) (*__IN(in)|*__OUT(in)|*__EX(in))
+#define BITS(fds,sz) (*__IN(fds,sz)|*__OUT(fds,sz)|*__EX(fds,sz))

-static int max_select_fd(unsigned long n, fd_set_buffer *fds)
+static int max_select_fd(unsigned int sz, unsigned long n, fds_ptr fds)
{
unsigned long *open_fds, *in;
unsigned long set;
@@ -116,11 +113,11 @@
/* handle last in-complete long-word first */
set = ~(~0UL << (n & (__NFDBITS-1)));
n /= __NFDBITS;
- open_fds = current->files->open_fds.fds_bits+n;
- in = fds->in+n;
+ open_fds = current->files->open_fds->fds_bits+n;
+ in = fds+n;
max = 0;
if (set) {
- set &= BITS(in);
+ set &= BITS(in,sz);
if (set) {
if (!(set & ~*open_fds))
goto get_max;
@@ -128,10 +125,10 @@
}
}
while (n) {
- in--;
+ in--;
open_fds--;
n--;
- set = BITS(in);
+ set = BITS(in,sz);
if (!set)
continue;
if (set & ~*open_fds)
@@ -154,14 +151,14 @@
#define ISSET(i,m) (((i)&*(m)) != 0)
#define SET(i,m) (*(m) |= (i))

-static int do_select(int n, fd_set_buffer *fds)
+static int do_select(int n, int sz, fds_ptr fds)
{
int retval;
select_table wait_table, *wait;
struct select_table_entry *entry;
int i;

- retval = max_select_fd(n, fds);
+ retval = max_select_fd(sz, n, fds);
if (retval < 0)
goto out;
n = retval;
@@ -177,23 +174,23 @@
current->state = TASK_INTERRUPTIBLE;
for (i = 0 ; i < n ; i++,fd++) {
unsigned long bit = BIT(i);
- unsigned long *in = MEM(i,fds->in);
+ unsigned long *in = MEM(i,fds);
struct file * file = *fd;

if (!file)
continue;
- if (ISSET(bit,__IN(in)) && check(SEL_IN,wait,file)) {
- SET(bit, __RES_IN(in));
+ if (ISSET(bit,__IN(in, sz)) && check(SEL_IN,wait,file)) {
+ SET(bit, __RES_IN(in,sz));
retval++;
wait = NULL;
}
- if (ISSET(bit,__OUT(in)) && check(SEL_OUT,wait,file)) {
- SET(bit, __RES_OUT(in));
+ if (ISSET(bit,__OUT(in,sz)) && check(SEL_OUT,wait,file)) {
+ SET(bit, __RES_OUT(in,sz));
retval++;
wait = NULL;
}
- if (ISSET(bit,__EX(in)) && check(SEL_EX,wait,file)) {
- SET(bit, __RES_EX(in));
+ if (ISSET(bit,__EX(in,sz)) && check(SEL_EX,wait,file)) {
+ SET(bit, __RES_EX(in,sz));
retval++;
wait = NULL;
}
@@ -210,46 +207,28 @@
return retval;
}

-/*
- * We do a VERIFY_WRITE here even though we are only reading this time:
- * we'll write to it eventually..
- *
- * Use "int" accesses to let user-mode fd_set's be int-aligned.
- */
static int __get_fd_set(unsigned long nr, int * fs_pointer, int * fdset)
{
/* round up nr to nearest "int" */
- nr = (nr + 8*sizeof(int)-1) / (8*sizeof(int));
+ nr = (nr + 8*sizeof(int)-1) / 8;
if (fs_pointer) {
- int error = verify_area(VERIFY_WRITE,fs_pointer,nr*sizeof(int));
- if (!error) {
- while (nr) {
- get_user(*fdset, fs_pointer);
- nr--;
- fs_pointer++;
- fdset++;
- }
- }
- return error;
- }
- while (nr) {
- *fdset = 0;
- nr--;
- fdset++;
+ if (copy_from_user(fdset, fs_pointer, nr))
+ return -EFAULT;
+ return 0;
+ } else {
+ memset(fdset, 0, nr);
+ return 0;
}
- return 0;
}

-static void __set_fd_set(long nr, int * fs_pointer, int * fdset)
+static int __set_fd_set(unsigned long nr, int * fs_pointer, long * fdset)
{
if (!fs_pointer)
- return;
- while (nr >= 0) {
- put_user(*fdset, fs_pointer);
- nr -= 8 * sizeof(int);
- fdset++;
- fs_pointer++;
- }
+ return 0;
+ /* round up nr to nearest "int" */
+ nr = (nr + 8*sizeof(int)-1) / 8;
+ nr /= 8;
+ return __copy_to_user(fs_pointer, fdset, nr) ? -EFAULT : 0;
}

/* We can do long accesses here, kernel fdsets are always long-aligned */
@@ -262,6 +241,47 @@
}
}

+/* XXX use a cached page like namei.c:getname(). */
+static inline fds_ptr alloc_fd_set_buffer(unsigned int sz)
+{
+ fds_ptr m;
+
+ sz *= 6;
+ if (sz <= PAGE_SIZE)
+ m = (fds_ptr)__get_free_page(GFP_KERNEL);
+ else
+ m = kmalloc(sz, GFP_KERNEL);
+ return m;
+}
+
+static inline void free_fd_set_buffer(unsigned int sz, fds_ptr fds)
+{
+ if (6*sz <= PAGE_SIZE)
+ free_page((unsigned long)fds);
+ else
+ kfree(fds);
+}
+
+void *__builtin_alloca(size_t);
+
+#define alloc_fd_set(n, sz) \
+({ fds_ptr fds; \
+ if (n > SMALL_NR_OPEN) { \
+ fds = alloc_fd_set_buffer(sz); \
+ if (!fds) \
+ return -ENOMEM; \
+ } else { \
+ fds = (fds_ptr)__builtin_alloca(6*sz); \
+ } \
+ fds; \
+})
+
+static inline void free_fd_set(unsigned int n, unsigned int sz, fds_ptr fds)
+{
+ if (n > SMALL_NR_OPEN)
+ free_fd_set_buffer(sz, fds);
+}
+
/*
* Note a few subtleties: we use "long" for the dummy, not int, and we do a
* subtract by 1 on the nr of file descriptors. The former is better for
@@ -289,58 +309,69 @@
asmlinkage int sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
{
int error;
- fd_set_buffer fds;
+ fds_ptr fds;
unsigned long timeout;
-
+ unsigned int sz;
+
error = -EINVAL;
if (n < 0)
- goto out;
- if (n > NR_OPEN)
- n = NR_OPEN;
- if ((error = get_fd_set(n, inp, &fds.in)) ||
- (error = get_fd_set(n, outp, &fds.out)) ||
- (error = get_fd_set(n, exp, &fds.ex))) goto out;
+ goto finalout;
+ if (n > current->files->nr_open)
+ n = current->files->nr_open;
+
+ sz = roundbit(n, unsigned long)/8;
+ fds = alloc_fd_set(n, sz);
+
+ if ((error = get_fd_set(n, inp, __IN(fds,sz))) ||
+ (error = get_fd_set(n, outp, __OUT(fds,sz))) ||
+ (error = get_fd_set(n, exp, __EX(fds,sz)))) goto out;
timeout = ~0UL;
if (tvp) {
- error = verify_area(VERIFY_WRITE, tvp, sizeof(*tvp));
- if (error)
- goto out;
- get_user(timeout, &tvp->tv_usec);
+ unsigned long tmp;
+
+ if ((error = get_user(timeout, &tvp->tv_usec)) ||
+ (error = get_user(tmp, &tvp->tv_sec)))
+ goto out;
timeout = ROUND_UP(timeout,(1000000/HZ));
- {
- unsigned long tmp;
- get_user(tmp, &tvp->tv_sec);
- timeout += tmp * (unsigned long) HZ;
- }
+ timeout += tmp * (unsigned long) HZ;
+ /* XXX Note that timeout might still be too
+ small because of rounding errors to 100000/HZ.
+ I'm not sure about the right fix though. */
if (timeout)
timeout += jiffies + 1;
}
- zero_fd_set(n, &fds.res_in);
- zero_fd_set(n, &fds.res_out);
- zero_fd_set(n, &fds.res_ex);
+ zero_fd_set(n, __RES_IN(fds,sz));
+ zero_fd_set(n, __RES_OUT(fds,sz));
+ zero_fd_set(n, __RES_EX(fds,sz));
current->timeout = timeout;
- error = do_select(n, &fds);
+ error = do_select(n, sz, fds);
timeout = current->timeout - jiffies - 1;
current->timeout = 0;
if ((long) timeout < 0)
timeout = 0;
if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
- put_user(timeout/HZ, &tvp->tv_sec);
- timeout %= HZ;
- timeout *= (1000000/HZ);
- put_user(timeout, &tvp->tv_usec);
+ if (put_user(timeout/HZ, &tvp->tv_sec) ||
+ put_user((timeout%HZ)*(1000000/HZ), &tvp->tv_usec)) {
+ error = -EFAULT;
+ goto out;
+ }
}
- if (error < 0)
- goto out;
+ if (error < 0)
+ goto out;
if (!error) {
error = -ERESTARTNOHAND;
if (current->signal & ~current->blocked)
goto out;
error = 0;
}
- set_fd_set(n, inp, &fds.res_in);
- set_fd_set(n, outp, &fds.res_out);
- set_fd_set(n, exp, &fds.res_ex);
+
+ if (set_fd_set(n, inp, __RES_IN(fds,sz)) ||
+ set_fd_set(n, outp, __RES_OUT(fds,sz)) ||
+ set_fd_set(n, exp, __RES_EX(fds,sz))) {
+ error = -EFAULT;
+ }
out:
+ free_fd_set(n, sz, fds);
+finalout:
return error;
}
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/fs/smbfs/inode.c linux-wrk/fs/smbfs/inode.c
--- /u2/unpack/linux-clean/fs/smbfs/inode.c Sun Jan 19 07:32:50 1997
+++ linux-wrk/fs/smbfs/inode.c Sun Jan 19 06:48:14 1997
@@ -220,7 +220,7 @@
return NULL;
}
fd = data.fd;
- if (fd >= NR_OPEN || !(filp = current->files->fd[fd]))
+ if (!(filp = file_from_fd(fd)))
{
printk("smb_read_super: invalid file descriptor\n");
sb->s_dev = 0;
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/include/asm-i386/system.h linux-wrk/include/asm-i386/system.h
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/include/linux/file.h linux-wrk/include/linux/file.h
--- /u2/unpack/linux-clean/include/linux/file.h Tue Aug 20 11:14:31 1996
+++ linux-wrk/include/linux/file.h Mon Jan 6 11:04:29 1997
@@ -4,7 +4,7 @@
extern inline struct file * fget(unsigned long fd)
{
struct file * file = NULL;
- if (fd < NR_OPEN) {
+ if (fd < current->files->nr_open) {
file = current->files->fd[fd];
if (file)
file->f_count++;
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/include/linux/fs.h linux-wrk/include/linux/fs.h
--- /u2/unpack/linux-clean/include/linux/fs.h Sun Jan 19 07:32:52 1997
+++ linux-wrk/include/linux/fs.h Sun Jan 19 06:58:21 1997
@@ -17,18 +17,11 @@
#include <linux/ioctl.h>

/*
- * It's silly to have NR_OPEN bigger than NR_FILE, but I'll fix
- * that later. Anyway, now the file code is no longer dependent
- * on bitmaps in unsigned longs, but uses the new fd_set structure..
- *
* Some programs (notably those using select()) may have to be
* recompiled to take full advantage of the new limits..
*/

/* Fixed constants first: */
-#undef NR_OPEN
-#define NR_OPEN 256
-
#define NR_SUPER 64
#define BLOCK_SIZE 1024
#define BLOCK_SIZE_BITS 10
@@ -38,6 +31,16 @@
extern int max_files, nr_files;
#define NR_INODE 3072 /* this should be bigger than NR_FILE */
#define NR_FILE 1024 /* this can well be larger on a larger system */
+
+/* this is dynamically tunable now. */
+#undef NR_OPEN
+#define NR_OPEN 1024
+extern int global_nr_open;
+
+/* fd_set size that is small enough for the kernel stack.
+ When the fd_set is bigger we allocate dynamically. Note that
+ we have to put SMALL_NR_OPEN/8*6 bytes on the kernel stack. */
+#define SMALL_NR_OPEN 1024 /* 128Bytes*6 = 768Bytes. */

#define MAY_EXEC 1
#define MAY_WRITE 2
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/include/linux/limits.h linux-wrk/include/linux/limits.h
--- /u2/unpack/linux-clean/include/linux/limits.h Sat Nov 9 18:32:22 1996
+++ linux-wrk/include/linux/limits.h Mon Jan 6 11:04:29 1997
@@ -1,7 +1,9 @@
#ifndef _LINUX_LIMITS_H
#define _LINUX_LIMITS_H

-#define NR_OPEN 256
+#ifndef NR_OPEN /* user can overwrite it */
+#define NR_OPEN 1024
+#endif

#define NGROUPS_MAX 32 /* supplemental group IDs are available */
#define ARG_MAX 131072 /* # bytes of args + environ for exec() */
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/include/linux/posix_types.h linux-wrk/include/linux/posix_types.h
--- /u2/unpack/linux-clean/include/linux/posix_types.h Sat Nov 9 18:30:55 1996
+++ linux-wrk/include/linux/posix_types.h Sat Jan 18 22:57:20 1997
@@ -16,6 +16,8 @@
* beyond that you'll have to change this too. But 1024 fd's seem to be
* enough even for such "real" unices like OSF/1, so hopefully this is
* one limit that doesn't have to be changed [again].
+ * - Note that the statement above is not true anymore, you can
+ * redefine NR_OPEN now and recompile your program.
*
* Note that POSIX wants the FD_CLEAR(fd,fdsetp) defines to be in
* <sys/time.h> (and thus <linux/time.h>) - but this is a more logical
@@ -30,10 +32,17 @@
#define __NFDBITS (8 * sizeof(unsigned long))

#undef __FD_SETSIZE
-#define __FD_SETSIZE 1024
+
+#undef __FD_SETSIZE
+/* User can define __USER_FDSETSIZE. */
+#ifdef __USER_FDSETSIZE
+#define __FD_SETSIZE __USER_FD_SETSIZE
+#else
+#define __FD_SETSIZE 1024
+#endif

#undef __FDSET_LONGS
-#define __FDSET_LONGS (__FD_SETSIZE/__NFDBITS)
+#define __FDSET_LONGS (__FD_SETSIZE/__NFDBITS)

#undef __FDELT
#define __FDELT(d) ((d) / __NFDBITS)
@@ -42,7 +51,7 @@
#define __FDMASK(d) (1UL << ((d) % __NFDBITS))

typedef struct fd_set {
- unsigned long fds_bits [__FDSET_LONGS];
+ unsigned long fds_bits [__FD_SETSIZE/__NFDBITS];
} __kernel_fd_set;

/* Type of a signal handler. */
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/include/linux/sched.h linux-wrk/include/linux/sched.h
--- /u2/unpack/linux-clean/include/linux/sched.h Sun Jan 5 22:33:06 1997
+++ linux-wrk/include/linux/sched.h Sun Jan 19 06:58:22 1997
@@ -113,16 +113,40 @@
/* Open file table structure */
struct files_struct {
int count;
- fd_set close_on_exec;
- fd_set open_fds;
- struct file * fd[NR_OPEN];
+ int nr_open;
+ fd_set *close_on_exec;
+ fd_set *open_fds;
+ struct file * fd[0]; /* it's really [nr_open] */
+ /* the close_on_exec and open_fds fd_sets follow */
+ /* XXX it could be better for the cache to move
+ open_fds in front of fd[]. I have to investigate this. */
};

+/*
+ * We could align the open_fds on a 64/128 bit bondary.
+ * Maybe this'll help on the sparc or the alpha.
+ * With the current power-of-two kmalloc() we have plenty of space
+ * anyway. We could even change this dynamically with the nr_open size.
+ * But make sure it always works with the constant initialiser.
+ */
+static inline void init_filestruct(struct files_struct *files, int nr_open)
+{
+ files->nr_open = nr_open;
+ files->close_on_exec = (fd_set*) &(files->fd[nr_open]);
+ files->open_fds = (fd_set*) ((char*)files->close_on_exec + nr_open/8);
+}
+
+#define FD_SET_INDEX sizeof(fd_set)/sizeof(struct file *)
+#define FILES_SIZE(nr_open) (sizeof(struct files_struct) + \
+ (nr_open)/4 + nr_open * sizeof(struct file *))
+
+/* you have to call init_filestruct() before using this. */
#define INIT_FILES { \
1, \
- { { 0, } }, \
- { { 0, } }, \
- { NULL, } \
+ NR_OPEN, \
+ NULL, /* this field has to be initialised. */ \
+ NULL, /* dito. */ \
+ { [NR_OPEN] NULL, [NR_OPEN + 2*FD_SET_INDEX + 1] NULL } \
}

struct fs_struct {
@@ -320,6 +344,7 @@

#include <asm/current.h>

+extern int global_nr_open;
extern unsigned long volatile jiffies;
extern unsigned long itimer_ticks;
extern unsigned long itimer_next;
@@ -388,11 +413,11 @@
*/
extern inline struct file *file_from_fd(const unsigned int fd)
{
-
- if (fd >= NR_OPEN)
+ struct files_struct *f = current->files;
+ if (fd >= f->nr_open)
return NULL;
/* either valid or null */
- return current->files->fd[fd];
+ return f->fd[fd];
}

/*
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/include/linux/sysctl.h linux-wrk/include/linux/sysctl.h
--- /u2/unpack/linux-clean/include/linux/sysctl.h Sun Jan 5 22:32:11 1997
+++ linux-wrk/include/linux/sysctl.h Mon Jan 6 11:04:29 1997
@@ -62,6 +62,7 @@
#define KERN_JAVA_INTERPRETER 19 /* path to Java(tm) interpreter */
#define KERN_JAVA_APPLETVIEWER 20 /* path to Java(tm) appletviewer */
#define KERN_SPARC_REBOOT 21 /* reboot command on Sparc */
+#define KERN_NROPEN 22

/* CTL_VM names: */
#define VM_SWAPCTL 1 /* struct: Set vm swapping control */
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/kernel/exit.c linux-wrk/kernel/exit.c
--- /u2/unpack/linux-clean/kernel/exit.c Sun Jan 5 22:33:06 1997
+++ linux-wrk/kernel/exit.c Mon Jan 6 11:04:29 1997
@@ -400,10 +400,10 @@

j = 0;
for (;;) {
- unsigned long set = files->open_fds.fds_bits[j];
+ unsigned long set = files->open_fds->fds_bits[j];
i = j * __NFDBITS;
j++;
- if (i >= NR_OPEN)
+ if (i >= files->nr_open)
break;
while (set) {
if (set & 1)
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/kernel/fork.c linux-wrk/kernel/fork.c
--- /u2/unpack/linux-clean/kernel/fork.c Sat Jan 18 22:06:38 1997
+++ linux-wrk/kernel/fork.c Sat Jan 18 22:17:21 1997
@@ -166,6 +166,7 @@
int i;
struct files_struct *oldf, *newf;
struct file **old_fds, **new_fds;
+ unsigned int nr_open;

oldf = current->files;
if (clone_flags & CLONE_FILES) {
@@ -173,24 +174,42 @@
return 0;
}

- newf = kmalloc(sizeof(*newf), GFP_KERNEL);
+ nr_open = global_nr_open;
+ if (nr_open < oldf->nr_open)
+ nr_open = oldf->nr_open;
+
+ newf = kmalloc(FILES_SIZE(nr_open), GFP_KERNEL);
tsk->files = newf;
if (!newf)
return -1;
-
+
+ init_filestruct(newf, nr_open);
newf->count = 1;
- newf->close_on_exec = oldf->close_on_exec;
- newf->open_fds = oldf->open_fds;
+ if (nr_open > oldf->nr_open) {
+ /* This is a unusual case so it could be moved out of the
+ * main code path. Best would be a early
+ * "newf->nr_open != oldf->nr_open" check and move the whole
+ * fixup out of the main code path.
+ */
+ memset(newf->close_on_exec,0,nr_open/8);
+ memset(newf->open_fds,0,nr_open/8);
+ }
+ memcpy(newf->close_on_exec, oldf->close_on_exec, oldf->nr_open/8);
+ memcpy(newf->open_fds, oldf->open_fds, oldf->nr_open/8);

old_fds = oldf->fd;
new_fds = newf->fd;
- for (i = NR_OPEN; i != 0; i--) {
+ for (i = oldf->nr_open; i != 0; i--) {
struct file * f = *old_fds;
old_fds++;
*new_fds = f;
new_fds++;
if (f)
f->f_count++;
+ }
+ for (i = newf->nr_open - oldf->nr_open; i > 0; i--) {
+ *new_fds = NULL;
+ new_fds++;
}
return 0;
}
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/kernel/sched.c linux-wrk/kernel/sched.c
--- /u2/unpack/linux-clean/kernel/sched.c Sun Jan 5 22:31:05 1997
+++ linux-wrk/kernel/sched.c Mon Jan 6 11:04:29 1997
@@ -1477,6 +1477,7 @@
for(cpu = 0; cpu < NR_CPUS; cpu++)
current_set[cpu] = &init_task;
#endif
+ init_filestruct(&init_files, NR_OPEN);
init_bh(TIMER_BH, timer_bh);
init_bh(TQUEUE_BH, tqueue_bh);
init_bh(IMMEDIATE_BH, immediate_bh);
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/kernel/sys.c linux-wrk/kernel/sys.c
--- /u2/unpack/linux-clean/kernel/sys.c Sun Jan 5 22:32:35 1997
+++ linux-wrk/kernel/sys.c Mon Jan 6 11:04:29 1997
@@ -893,7 +893,8 @@
!suser())
return -EPERM;
if (resource == RLIMIT_NOFILE) {
- if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN)
+ if (new_rlim.rlim_cur > current->files->nr_open ||
+ new_rlim.rlim_max > current->files->nr_open)
return -EPERM;
}
*old_rlim = new_rlim;
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/kernel/sysctl.c linux-wrk/kernel/sysctl.c
--- /u2/unpack/linux-clean/kernel/sysctl.c Sun Jan 5 22:32:12 1997
+++ linux-wrk/kernel/sysctl.c Mon Jan 6 11:04:29 1997
@@ -99,6 +99,7 @@
static int do_securelevel_strategy (ctl_table *, int *, int, void *, size_t *,
void *, size_t, void **);

+extern int sysctl_proc_nropen(ctl_table *,int,struct file *, void *, size_t *);
extern char binfmt_java_interpreter[], binfmt_java_appletviewer[];

#ifdef __sparc__
@@ -133,6 +134,8 @@
0444, NULL, &proc_dointvec},
{KERN_MAXFILE, "file-max", &max_files, sizeof(int),
0644, NULL, &proc_dointvec},
+ {KERN_NROPEN, "nr-open", &global_nr_open, sizeof(int),
+ 0644, NULL, &sysctl_proc_nropen},
{KERN_SECURELVL, "securelevel", &securelevel, sizeof(int),
0444, NULL, &proc_dointvec, (ctl_handler *)&do_securelevel_strategy},
{KERN_PANIC, "panic", &panic_timeout, sizeof(int),
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/net/core/scm.c linux-wrk/net/core/scm.c
--- /u2/unpack/linux-clean/net/core/scm.c Sat Jan 18 22:06:39 1997
+++ linux-wrk/net/core/scm.c Mon Jan 6 11:04:29 1997
@@ -89,15 +89,11 @@

for (i=0; i< num; i++)
{
- int fd;
-
- fd = fdp[i];
-
- if (fd < 0 || fd >= NR_OPEN)
- return -EBADF;
- if (current->files->fd[fd]==NULL)
- return -EBADF;
- fpp[i] = current->files->fd[fd];
+ struct file *f;
+
+ if (!(f = file_from_fd(fdp[i])))
+ return -EBADF;
+ fpp[i] = f;
}

/* add another reference to these files */
@@ -177,9 +173,8 @@

p->sock = NULL;
if (acc_fd != -1) {
- if (acc_fd < 0 || acc_fd >= NR_OPEN ||
- (file=current->files->fd[acc_fd])==NULL)
- return -EBADF;
+ if (!(file = file_from_fd(acc_fd)))
+ return -EBADF;
if (!file->f_inode || !file->f_inode->i_sock)
return -ENOTSOCK;
p->sock = &file->f_inode->u.socket_i;
diff -u --recursive -x *.o -x .* -x *.orig -x *drivers/scsi* /u2/unpack/linux-clean/net/socket.c linux-wrk/net/socket.c
--- /u2/unpack/linux-clean/net/socket.c Sun Jan 19 07:32:53 1997
+++ linux-wrk/net/socket.c Sun Jan 19 06:48:16 1997
@@ -230,7 +230,7 @@
struct file *file;
struct inode *inode;

- if (fd < 0 || fd >= NR_OPEN || !(file = current->files->fd[fd]))
+ if (!(file = file_from_fd(fd)))
{
*err = -EBADF;
return NULL;

--
|andi@mlm.extern.lrz-muenchen.de     Nonsense is better than no sense at all.
|                                        -NoMeansNo,0-1=2