Re: general protection faults with 2.0.30 and filehandle patch

Matthias Urlichs (smurf@work.smurf.noris.de)
20 May 1997 23:52:34 +0200


Andi Kleen <andi@mind.aec.at> writes:
> Oskar Pearson <oskar@is.co.za> writes:
>
> > This is with 2.0.30 with the filehandle patch from
> > www.linux.org.za/filehandle.patch.linux (or
> > ftp://ftp.is.co.za/linux/local/filehandle.patch.linux)
> >
> You can't run a unpatched 2.0 kernel with 3000fds per process.
> The sys_select() routine will overflow the kernel stack: it puts

Ahem. He did mention the above patch. This patch tries to fix sys/select so
that the stack overrun will not happen.

Unfortunately, that particular patch does seem to cause trouble. I've had
more success with my own change to fs/select.c, appended below.

Index: linux/fs/select.c
===================================================================
RCS file: /usr/src/cvs/kernel/linux/fs/select.c,v
retrieving revision 1.1.1.3
retrieving revision 1.3
diff -u -r1.1.1.3 -r1.3
--- select.c 1997/01/16 07:21:46 1.1.1.3
+++ select.c 1997/05/18 23:32:12 1.3
@@ -21,6 +21,7 @@
#include <linux/errno.h>
#include <linux/personality.h>
#include <linux/mm.h>
+#include <linux/malloc.h>

#include <asm/segment.h>
#include <asm/system.h>
@@ -205,20 +206,11 @@
}

/*
- * Due to kernel stack usage, we use a _limited_ fd_set type here, and once
- * we really start supporting >256 file descriptors we'll probably have to
- * allocate the kernel fd_set copies dynamically.. (The kernel select routines
- * are careful to touch only the defined low bits of any fd_set pointer, this
- * is important for performance too).
- *
* Note a few subtleties: we use "long" for the dummy, not int, and we do a
* subtract by 1 on the nr of file descriptors. The former is better for
* machines with long > int, and the latter allows us to test the bit count
* against "zero or positive", which can mostly be just a sign bit test..
*/
-typedef struct {
- unsigned long dummy[NR_OPEN/(8*(sizeof(unsigned long)))];
-} limited_fd_set;

#define get_fd_set(nr,fsp,fdp) \
__get_fd_set(nr, (int *) (fsp), (int *) (fdp))
@@ -237,22 +229,47 @@
* Update: ERESTARTSYS breaks at least the xview clock binary, so
* I'm trying ERESTARTNOHAND which restart only when you want to.
*/
+
asmlinkage int sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
{
int error;
- limited_fd_set res_in, in;
- limited_fd_set res_out, out;
- limited_fd_set res_ex, ex;
+ char *seldata;
+ int selsize;
+ fd_set *res_in, *in;
+ fd_set *res_out, *out;
+ fd_set *res_ex, *ex;
unsigned long timeout;
+ long long s_a,s_b,s_c,s_d,s_e,s_f;

error = -EINVAL;
if (n < 0)
goto out;
if (n > NR_OPEN)
n = NR_OPEN;
- if ((error = get_fd_set(n, inp, &in)) ||
- (error = get_fd_set(n, outp, &out)) ||
- (error = get_fd_set(n, exp, &ex))) goto out;
+ selsize = (n+31)>>5;
+ if(selsize <= 2) {
+ seldata = NULL;
+ in = (fd_set *)&s_a;
+ out= (fd_set *)&s_b;
+ ex = (fd_set *)&s_c;
+ res_in = (fd_set *)&s_d;
+ res_out= (fd_set *)&s_e;
+ res_ex = (fd_set *)&s_f;
+ } else {
+ seldata = kmalloc(6*selsize,GFP_KERNEL);
+ error = -ENOMEM;
+ if(seldata == NULL)
+ goto out;
+ in = (fd_set *)seldata; seldata += selsize;
+ out = (fd_set *)seldata; seldata += selsize;
+ ex = (fd_set *)seldata; seldata += selsize;
+ res_in = (fd_set *)seldata; seldata += selsize;
+ res_out = (fd_set *)seldata; seldata += selsize;
+ res_ex = (fd_set *)seldata;
+ }
+ if ((error = get_fd_set(n, inp, in)) ||
+ (error = get_fd_set(n, outp, out)) ||
+ (error = get_fd_set(n, exp, ex))) goto out;
timeout = ~0UL;
if (tvp) {
error = verify_area(VERIFY_WRITE, tvp, sizeof(*tvp));
@@ -268,12 +285,12 @@
zero_fd_set(n, &res_ex);
current->timeout = timeout;
error = do_select(n,
- (fd_set *) &in,
- (fd_set *) &out,
- (fd_set *) &ex,
- (fd_set *) &res_in,
- (fd_set *) &res_out,
- (fd_set *) &res_ex);
+ (fd_set *) in,
+ (fd_set *) out,
+ (fd_set *) ex,
+ (fd_set *) res_in,
+ (fd_set *) res_out,
+ (fd_set *) res_ex);
timeout = current->timeout - jiffies - 1;
current->timeout = 0;
if ((long) timeout < 0)
@@ -292,9 +309,11 @@
goto out;
error = 0;
}
- set_fd_set(n, inp, &res_in);
- set_fd_set(n, outp, &res_out);
- set_fd_set(n, exp, &res_ex);
+ set_fd_set(n, inp, res_in);
+ set_fd_set(n, outp, res_out);
+ set_fd_set(n, exp, res_ex);
out:
+ if(seldata != NULL)
+ kfree(seldata);
return error;
}

-- 
Eagles may soar, but weasels don't get sucked into jet engines.
                -- John Benfield
-- 
Matthias Urlichs         \  noris network GmbH  /  Xlink-POP Nürnberg 
Schleiermacherstraße 12   \   Linux+Internet   /   EMail: urlichs@noris.de
90491 Nürnberg (Germany)   \    Consulting+Programming+Networking+etc'ing
   PGP: 1024/4F578875   1B 89 E2 1C 43 EA 80 44  15 D2 29 CF C6 C7 E0 DE
       Click <A HREF="http://info.noris.de/~smurf/finger">here</A>.    42