Re: More file descriptors are needed

Bill Hawes (whawes@star.net)
Tue, 22 Sep 1998 09:15:43 -0400


This is a multi-part message in MIME format.
--------------58CBC9E7F1D69E0158A0FC8D
Content-Type: text/plain; charset=iso-8859-1
Content-Transfer-Encoding: 8bit

±è½ÂÇÐ wrote:

> I have to get as many as 3000 open files, or fds.
> so, I modified NR_OPEN, NR_FILE, FD_SETSIZE to 3000 and build the kernel.
> When I reboot the system, only single user mode is allowed.
> I think some modules such as aic7xxxx, 3c59x are not loaded at all.
>
> I just read that a Bill Hawes' patch is for so many fds.
> Let me know where I can get the patch.
> If you post the patch, I'd really appreciate it.

My patch for dynamic fd arrays is intended more to save kernel memory
and speed up forking, but should work for extending the NR_OPEN limit as
well. (But there may be other changes to the kernel necessary beyond
changing NR_OPEN.) I've attached a copy of the dynamic fd patch.

There are also other patches intended only for extending the fd limit.
Note that you'll probably need to recompile your libraries (libc or
glibc) as well, as these have the NR_OPEN value hard-coded in some
places.

Regards,
Bill
--------------58CBC9E7F1D69E0158A0FC8D
Content-Type: text/plain; charset=us-ascii; name="fork_files120-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="fork_files120-patch"

--- linux-2.1.120/include/linux/sched.h.old Sat Sep 5 10:52:54 1998
+++ linux-2.1.120/include/linux/sched.h Sat Sep 5 11:11:30 1998
@@ -124,7 +124,11 @@

asmlinkage void schedule(void);

-
+/*
+ * The default fd array needs to be at least BITS_PER_LONG,
+ * as this is the granularity returned by copy_fdset().
+ */
+#define NR_OPEN_DEFAULT BITS_PER_LONG
/*
* Open file table structure
*/
@@ -134,6 +138,7 @@
struct file ** fd; /* current fd array */
fd_set close_on_exec;
fd_set open_fds;
+ struct file * fd_array[NR_OPEN_DEFAULT];
};

#define INIT_FILES { \
@@ -141,7 +146,8 @@
NR_OPEN, \
&init_fd_array[0], \
{ { 0, } }, \
- { { 0, } } \
+ { { 0, } }, \
+ { NULL, } \
}

struct fs_struct {
@@ -605,6 +611,13 @@
atomic_inc(&mm->count);
}
extern void mmput(struct mm_struct *);
+
+/*
+ * Routines for handling the fd arrays
+ */
+extern struct file ** alloc_fd_array(int);
+extern int expand_fd_array(struct files_struct *);
+extern void free_fd_array(struct file **, int);

extern int copy_thread(int, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
extern void flush_thread(void);
--- linux-2.1.120/kernel/fork.c.old Sat Sep 5 10:52:55 1998
+++ linux-2.1.120/kernel/fork.c Sat Sep 5 11:01:22 1998
@@ -379,11 +379,78 @@
return __copy_fdset(dst->fds_bits, src->fds_bits);
}

+/*
+ * Allocate an fd array, using get_free_page() if possible.
+ * Note: the array isn't cleared at allocation time.
+ */
+struct file ** alloc_fd_array(int num)
+{
+ struct file **new_fds;
+ int size = num * sizeof(struct file *);
+
+ if (size == PAGE_SIZE)
+ new_fds = (struct file **) __get_free_page(GFP_KERNEL);
+ else
+ new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
+ return new_fds;
+}
+
+/*
+ * Expand the fd array in the files_struct.
+ */
+int expand_fd_array(struct files_struct *files)
+{
+ struct file **new_fds, **old_fds;
+ int error, nfds;
+
+ error = -EMFILE;
+ if (files->max_fds >= NR_OPEN)
+ goto out;
+
+ /* Expand to the max in one step */
+ nfds = NR_OPEN;
+
+ error = -ENOMEM;
+ new_fds = alloc_fd_array(nfds);
+ if (!new_fds)
+ goto out;
+
+ /* Copy the existing array and install the new pointer */
+ if (nfds > files->max_fds) {
+ int i = files->max_fds;
+ int size = (nfds - i) * sizeof(struct file *);
+
+ old_fds = files->fd;
+ files->fd = new_fds;
+ files->max_fds = nfds;
+ while (i--)
+ *new_fds++ = *old_fds++;
+ /* clear the remainder of the array */
+ memset(new_fds, 0, size);
+ } else {
+ /* Somebody expanded the array while we slept ... */
+ free_fd_array(new_fds, nfds);
+ }
+ error = 0;
+out:
+ return error;
+}
+
+void free_fd_array(struct file **array, int num)
+{
+ int size = num * sizeof(struct file *);
+
+ if (size == PAGE_SIZE)
+ free_page((unsigned long) array);
+ else
+ kfree(array);
+}
+
static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
{
struct files_struct *oldf, *newf;
struct file **old_fds, **new_fds;
- int size, i, error = 0;
+ int nfds, size, i, error = 0;

/*
* A background process may not have any files ...
@@ -403,24 +470,35 @@
if (!newf)
goto out;

- /*
- * Allocate the fd array, using get_free_page() if possible.
- * Eventually we want to make the array size variable ...
- */
- size = NR_OPEN * sizeof(struct file *);
- if (size == PAGE_SIZE)
- new_fds = (struct file **) __get_free_page(GFP_KERNEL);
- else
- new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
- if (!new_fds)
- goto out_release;
-
atomic_set(&newf->count, 1);
- newf->max_fds = NR_OPEN;
- newf->fd = new_fds;
newf->close_on_exec = oldf->close_on_exec;
i = copy_fdset(&newf->open_fds, &oldf->open_fds);

+#if 1
+ /* Do a sanity check ... */
+ if (i > oldf->max_fds)
+ printk("copy_files: pid %d, open files %d exceeds max %d!\n",
+ current->pid, i, oldf->max_fds);
+#endif
+
+ /*
+ * Check whether we need to allocate a larger fd array.
+ * Note: we're not a clone task, so the open count won't
+ * change.
+ */
+ new_fds = &newf->fd_array[0];
+ nfds = NR_OPEN_DEFAULT;
+ if (i > nfds) {
+ nfds = NR_OPEN;
+ new_fds = alloc_fd_array(nfds);
+ if (!new_fds)
+ goto out_release;
+ }
+ newf->max_fds = nfds;
+ newf->fd = new_fds;
+
+ /* compute the remainder to be cleared */
+ size = (nfds - i) * sizeof(struct file *);
old_fds = oldf->fd;
for (; i != 0; i--) {
struct file *f = *old_fds++;
@@ -430,7 +508,7 @@
new_fds++;
}
/* This is long word aligned thus could use a optimized version */
- memset(new_fds, 0, (char *)newf->fd + size - (char *)new_fds);
+ memset(new_fds, 0, size);

tsk->files = newf;
error = 0;
--- linux-2.1.120/kernel/exit.c.old Tue Sep 1 21:39:19 1998
+++ linux-2.1.120/kernel/exit.c Sat Sep 5 11:01:22 1998
@@ -199,12 +199,10 @@
if (atomic_dec_and_test(&files->count)) {
close_files(files);
/*
- * Free the fd array as appropriate ...
+ * Free the fd array if we expanded it.
*/
- if (NR_OPEN * sizeof(struct file *) == PAGE_SIZE)
- free_page((unsigned long) files->fd);
- else
- kfree(files->fd);
+ if (files->fd != &files->fd_array[0])
+ free_fd_array(files->fd, files->max_fds);
kmem_cache_free(files_cachep, files);
}
}
--- linux-2.1.120/fs/open.c.old Tue Sep 1 21:40:19 1998
+++ linux-2.1.120/fs/open.c Sat Sep 5 11:01:22 1998
@@ -690,6 +690,7 @@
struct files_struct * files = current->files;
int fd, error;

+repeat:
error = -EMFILE;
fd = find_first_zero_bit(&files->open_fds, NR_OPEN);
/*
@@ -698,8 +699,15 @@
*/
if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
-
- /* Check here for fd > files->max_fds to do dynamic expansion */
+ /*
+ * Check whether we need to expand the fd array.
+ */
+ if (fd >= files->max_fds) {
+ error = expand_fd_array(files);
+ if (!error)
+ goto repeat;
+ goto out;
+ }

FD_SET(fd, &files->open_fds);
FD_CLR(fd, &files->close_on_exec);
--- linux-2.1.120/fs/fcntl.c.old Tue Sep 1 21:39:47 1998
+++ linux-2.1.120/fs/fcntl.c Sat Sep 5 11:01:22 1998
@@ -20,14 +20,15 @@

extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg);

-static inline int dupfd(unsigned int fd, unsigned int arg)
+static inline int dupfd(unsigned int fd, unsigned int in_arg)
{
struct files_struct * files = current->files;
struct file * file;
+ unsigned int arg;
int error;

error = -EINVAL;
- if (arg >= NR_OPEN)
+ if (in_arg >= NR_OPEN)
goto out;

error = -EBADF;
@@ -35,10 +36,21 @@
if (!file)
goto out;

+repeat:
error = -EMFILE;
- arg = find_next_zero_bit(&files->open_fds, NR_OPEN, arg);
+ arg = find_next_zero_bit(&files->open_fds, NR_OPEN, in_arg);
if (arg >= current->rlim[RLIMIT_NOFILE].rlim_cur)
goto out_putf;
+ /*
+ * Check whether we need to expand the fd array.
+ */
+ if (arg >= files->max_fds) {
+ error = expand_fd_array(files);
+ if (!error)
+ goto repeat;
+ goto out_putf;
+ }
+
FD_SET(arg, &files->open_fds);
FD_CLR(arg, &files->close_on_exec);
fd_install(arg, file);
@@ -58,12 +70,12 @@
lock_kernel();
if (!fcheck(oldfd))
goto out;
+ if (newfd >= NR_OPEN)
+ goto out; /* following POSIX.1 6.2.1 */
+
err = newfd;
if (newfd == oldfd)
goto out;
- err = -EBADF;
- if (newfd >= NR_OPEN)
- goto out; /* following POSIX.1 6.2.1 */

sys_close(newfd);
err = dupfd(oldfd, newfd);
@@ -119,6 +131,7 @@
filp = fget(fd);
if (!filp)
goto out;
+
err = 0;
switch (cmd) {
case F_DUPFD:
@@ -159,7 +172,6 @@
err = filp->f_owner.pid;
break;
case F_SETOWN:
- err = 0;
filp->f_owner.pid = arg;
filp->f_owner.uid = current->uid;
filp->f_owner.euid = current->euid;
@@ -179,10 +191,9 @@
break;
default:
/* sockets need a few special fcntls. */
+ err = -EINVAL;
if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
err = sock_fcntl (filp, cmd, arg);
- else
- err = -EINVAL;
break;
}
fput(filp);
--- linux-2.1.120/fs/proc/array.c.old Sat Sep 5 10:52:50 1998
+++ linux-2.1.120/fs/proc/array.c Sat Sep 5 11:01:22 1998
@@ -678,11 +678,14 @@
"Pid:\t%d\n"
"PPid:\t%d\n"
"Uid:\t%d\t%d\t%d\t%d\n"
- "Gid:\t%d\t%d\t%d\t%d\n",
+ "Gid:\t%d\t%d\t%d\t%d\n"
+ "FDSize:\t%d\n",
get_task_state(p),
- p->pid, p->p_pptr->pid,
+ p->pid,
+ p->p_pptr->pid,
p->uid, p->euid, p->suid, p->fsuid,
- p->gid, p->egid, p->sgid, p->fsgid);
+ p->gid, p->egid, p->sgid, p->fsgid,
+ p->files ? p->files->max_fds : 0);
return buffer;
}

--------------58CBC9E7F1D69E0158A0FC8D--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/