Hi guys,
Here is the quick and dirty version of my "chunk allocator" for the
poll(2) system call. Before I send it to Alan I need to think (or better
sleep) on it a bit but I thought it is worth to show it now so you may
help me catch something I probably missed, i.e. please review :)
http://www.ocston.rog/~tigran/patches/pollfix.patch
Thanks,
tigran
--- linux/fs/select.c Tue Aug 31 19:30:48 1999
+++ linux-poll/fs/select.c Tue Jan 18 20:41:00 2000
@@ -8,6 +8,10 @@
* COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
* flag set in its personality we do *not* modify the given timeout
* parameter to reflect time remaining.
+ *
+ * 18 January 2000
+ * Changed sys_poll()/do_poll() to use chunk-based allocation of fds to
+ * overcome nfds < 16390 descriptors limit (Tigran Aivazian).
*/
#include <linux/malloc.h>
@@ -328,39 +332,52 @@
return ret;
}
-static int do_poll(unsigned int nfds, struct pollfd *fds, poll_table *wait,
+#define POLL_PER_CHUNK 10000
+#define POLL_NCHUNKS 16
+
+static void do_pollfd(struct pollfd * fdp, poll_table * wait, int *count)
+{
+ int fd;
+ unsigned int mask;
+
+ mask = 0;
+ fd = fdp->fd;
+ if (fd >= 0) {
+ struct file * file = fget(fd);
+ mask = POLLNVAL;
+ if (file != NULL) {
+ mask = DEFAULT_POLLMASK;
+ if (file->f_op && file->f_op->poll)
+ mask = file->f_op->poll(file, wait);
+ mask &= fdp->events | POLLERR | POLLHUP;
+ fput(file);
+ }
+ if (mask) {
+ wait = NULL;
+ (*count)++;
+ }
+ }
+ fdp->revents = mask;
+}
+
+static int do_poll(unsigned int nfds, struct pollfd *fds[], poll_table *wait,
long timeout)
{
int count = 0;
+ int nchunks = nfds / POLL_PER_CHUNK;
+ int nleft = nfds % POLL_PER_CHUNK;
for (;;) {
- unsigned int j;
+ unsigned int i, j;
struct pollfd * fdpnt;
set_current_state(TASK_INTERRUPTIBLE);
- for (fdpnt = fds, j = 0; j < nfds; j++, fdpnt++) {
- int fd;
- unsigned int mask;
-
- mask = 0;
- fd = fdpnt->fd;
- if (fd >= 0) {
- struct file * file = fget(fd);
- mask = POLLNVAL;
- if (file != NULL) {
- mask = DEFAULT_POLLMASK;
- if (file->f_op && file->f_op->poll)
- mask = file->f_op->poll(file, wait);
- mask &= fdpnt->events | POLLERR | POLLHUP;
- fput(file);
- }
- if (mask) {
- wait = NULL;
- count++;
- }
- }
- fdpnt->revents = mask;
- }
+ for (i=0; i < nchunks; i++)
+ for (fdpnt = fds[i], j = 0; j < POLL_PER_CHUNK; j++, fdpnt++)
+ do_pollfd(fdpnt, wait, &count);
+ if (nleft)
+ for (fdpnt = fds[nchunks], j = 0; j < nleft; j++, fdpnt++)
+ do_pollfd(fdpnt, wait, &count);
wait = NULL;
if (count || !timeout || signal_pending(current))
@@ -373,18 +390,17 @@
asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
{
- int i, fdcount, err, size;
- struct pollfd * fds, *fds1;
+ int i, j, fdcount, err;
+ struct pollfd * fds[POLL_NCHUNKS];
poll_table *wait_table = NULL, *wait = NULL;
+ int nchunks, nleft;
- lock_kernel();
/* Do a sanity check on nfds ... */
- err = -EINVAL;
- if (nfds > current->files->max_fds)
- goto out;
+ if (nfds > current->files->max_fds || nfds > POLL_PER_CHUNK*POLL_NCHUNKS)
+ return -EINVAL;
if (timeout) {
- /* Carefula about overflow in the intermediate values */
+ /* Careful about overflow in the intermediate values */
if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
timeout = (unsigned long)(timeout*HZ+999)/1000+1;
else /* Negative or overflow */
@@ -392,6 +408,7 @@
}
err = -ENOMEM;
+ lock_kernel();
if (timeout) {
wait_table = (poll_table *) __get_free_page(GFP_KERNEL);
if (!wait_table)
@@ -402,29 +419,57 @@
wait = wait_table;
}
- size = nfds * sizeof(struct pollfd);
- fds = (struct pollfd *) kmalloc(size, GFP_KERNEL);
- if (!fds)
- goto out;
+ nchunks = 0;
+ nleft = nfds;
+ while (nleft > POLL_PER_CHUNK) { /* allocate complete chunks */
+ fds[nchunks] = kmalloc(POLL_PER_CHUNK * sizeof(struct pollfd), GFP_KERNEL);
+ if (fds[nchunks] == NULL)
+ goto out_fds;
+ nchunks++;
+ nleft -= POLL_PER_CHUNK;
+ }
+ if (nleft) { /* allocate last chunk, incomplete = nleft elements */
+ fds[nchunks] = kmalloc(nleft * sizeof(struct pollfd), GFP_KERNEL);
+ if (fds[nchunks] == NULL)
+ goto out_fds;
+ }
err = -EFAULT;
- if (copy_from_user(fds, ufds, size))
- goto out_fds;
+ for (i=0; i < nchunks; i++) {
+ if (copy_from_user(fds[i], ufds + i*POLL_PER_CHUNK,
+ POLL_PER_CHUNK * sizeof(struct pollfd)))
+ goto out_fds1;
+ }
+ if (nleft) {
+ if (copy_from_user(fds[i], ufds + nchunks*POLL_PER_CHUNK,
+ nleft * sizeof(struct pollfd)))
+ goto out_fds1;
+ }
fdcount = do_poll(nfds, fds, wait, timeout);
/* OK, now copy the revents fields back to user space. */
- fds1 = fds;
- for(i=0; i < (int)nfds; i++, ufds++, fds1++) {
- __put_user(fds1->revents, &ufds->revents);
+ for(i=0; i < nchunks; i++) {
+ struct pollfd *u;
+ u = ufds + i*POLL_PER_CHUNK;
+ for (j=0; j < POLL_PER_CHUNK; j++) {
+ struct pollfd *p;
+
+ p = fds[i] + j;
+ __put_user((fds[i] + j)->revents, &u->revents);
+ }
}
err = fdcount;
if (!fdcount && signal_pending(current))
err = -EINTR;
+out_fds1:
+ if (nleft)
+ kfree(fds[nchunks]);
out_fds:
- kfree(fds);
+ for (i=0; i < nchunks; i++)
+ kfree(fds[i]);
out:
if (wait)
free_wait(wait_table);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/
This archive was generated by hypermail 2b29 : Sun Jan 23 2000 - 21:00:18 EST