[BK PATCH 1/1] Remove NGROUPS hard limit (re-re-re-re-send)

From: Timothy Hockin (th122948@scl2.sfbay.sun.com)
Date: Wed Dec 04 2002 - 20:50:14 EST


# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
# ChangeSet 1.905 -> 1.906
# include/linux/kernel.h 1.27 -> 1.28
# lib/Makefile 1.15 -> 1.16
# include/linux/init_task.h 1.19 -> 1.20
# include/linux/sched.h 1.114 -> 1.115
# kernel/fork.c 1.92 -> 1.93
# kernel/sys.c 1.36 -> 1.37
# include/asm-i386/param.h 1.2 -> 1.3
# include/linux/sunrpc/svcauth.h 1.4 -> 1.5
# kernel/uid16.c 1.4 -> 1.5
# fs/proc/array.c 1.35 -> 1.36
# net/sunrpc/svcauth_unix.c 1.9 -> 1.10
# kernel/exit.c 1.76 -> 1.77
# include/linux/limits.h 1.3 -> 1.4
# fs/nfsd/auth.c 1.1 -> 1.2
# (new) -> 1.1 lib/bsearch.c
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/12/04 thockin@freakshow.cobalt.com 1.906
# Remove the limit of 32 groups. We now have a per-task, dynamic array of
# groups, which is kept sorted and refcounted. If the task has less than 32
# groups, we behave like older kernels and use an inline array.
#
# This ChangeSet incorporates all the core functionality. but does not fixup
# all the incorrect architecture usages of groups.
# --------------------------------------------
#
diff -Nru a/fs/nfsd/auth.c b/fs/nfsd/auth.c
--- a/fs/nfsd/auth.c Wed Dec 4 17:30:43 2002
+++ b/fs/nfsd/auth.c Wed Dec 4 17:30:43 2002
@@ -10,12 +10,15 @@
 #include <linux/sunrpc/svcauth.h>
 #include <linux/nfsd/nfsd.h>
 
+extern asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist);
+
 #define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE))
 void
 nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
 {
         struct svc_cred *cred = &rqstp->rq_cred;
         int i;
+ gid_t groups[SVC_CRED_NGROUPS];
 
         if (rqstp->rq_userset)
                 return;
@@ -29,7 +32,7 @@
                         cred->cr_uid = exp->ex_anon_uid;
                 if (!cred->cr_gid)
                         cred->cr_gid = exp->ex_anon_gid;
- for (i = 0; i < NGROUPS; i++)
+ for (i = 0; i < SVC_CRED_NGROUPS; i++)
                         if (!cred->cr_groups[i])
                                 cred->cr_groups[i] = exp->ex_anon_gid;
         }
@@ -42,13 +45,13 @@
                 current->fsgid = cred->cr_gid;
         else
                 current->fsgid = exp->ex_anon_gid;
- for (i = 0; i < NGROUPS; i++) {
+ for (i = 0; i < SVC_CRED_NGROUPS; i++) {
                 gid_t group = cred->cr_groups[i];
                 if (group == (gid_t) NOGROUP)
                         break;
- current->groups[i] = group;
+ groups[i] = group;
         }
- current->ngroups = i;
+ sys_setgroups(i, groups);
 
         if ((cred->cr_uid)) {
                 cap_t(current->cap_effective) &= ~CAP_NFSD_MASK;
diff -Nru a/fs/proc/array.c b/fs/proc/array.c
--- a/fs/proc/array.c Wed Dec 4 17:30:43 2002
+++ b/fs/proc/array.c Wed Dec 4 17:30:43 2002
@@ -172,7 +172,7 @@
                 p->files ? p->files->max_fds : 0);
         task_unlock(p);
 
- for (g = 0; g < p->ngroups; g++)
+ for (g = 0; g < min(p->ngroups, OLD_NGROUPS); g++)
                 buffer += sprintf(buffer, "%d ", p->groups[g]);
 
         buffer += sprintf(buffer, "\n");
diff -Nru a/include/asm-i386/param.h b/include/asm-i386/param.h
--- a/include/asm-i386/param.h Wed Dec 4 17:30:43 2002
+++ b/include/asm-i386/param.h Wed Dec 4 17:30:43 2002
@@ -13,10 +13,6 @@
 
 #define EXEC_PAGESIZE 4096
 
-#ifndef NGROUPS
-#define NGROUPS 32
-#endif
-
 #ifndef NOGROUP
 #define NOGROUP (-1)
 #endif
diff -Nru a/include/linux/init_task.h b/include/linux/init_task.h
--- a/include/linux/init_task.h Wed Dec 4 17:30:43 2002
+++ b/include/linux/init_task.h Wed Dec 4 17:30:43 2002
@@ -80,6 +80,7 @@
         .real_timer = { \
                 .function = it_real_fn \
         }, \
+ .ngroups = 0, \
         .cap_effective = CAP_INIT_EFF_SET, \
         .cap_inheritable = CAP_INIT_INH_SET, \
         .cap_permitted = CAP_FULL_SET, \
diff -Nru a/include/linux/kernel.h b/include/linux/kernel.h
--- a/include/linux/kernel.h Wed Dec 4 17:30:43 2002
+++ b/include/linux/kernel.h Wed Dec 4 17:30:43 2002
@@ -216,4 +216,7 @@
 #define __FUNCTION__ (__func__)
 #endif
 
+void *bsearch(const void *key, const void *base, size_t nmemb, size_t size,
+ int (*compar)(const void *, const void *));
+
 #endif
diff -Nru a/include/linux/limits.h b/include/linux/limits.h
--- a/include/linux/limits.h Wed Dec 4 17:30:43 2002
+++ b/include/linux/limits.h Wed Dec 4 17:30:43 2002
@@ -3,7 +3,6 @@
 
 #define NR_OPEN 1024
 
-#define NGROUPS_MAX 32 /* supplemental group IDs are available */
 #define ARG_MAX 131072 /* # bytes of args + environ for exec() */
 #define CHILD_MAX 999 /* no limit :-) */
 #define OPEN_MAX 256 /* # open files a process may have */
@@ -18,5 +17,7 @@
 #define XATTR_LIST_MAX 65536 /* size of extended attribute namelist (64k) */
 
 #define RTSIG_MAX 32
+
+#define OLD_NGROUPS 32 /* old limit of supplemental group IDs */
 
 #endif
diff -Nru a/include/linux/sched.h b/include/linux/sched.h
--- a/include/linux/sched.h Wed Dec 4 17:30:43 2002
+++ b/include/linux/sched.h Wed Dec 4 17:30:43 2002
@@ -276,6 +276,8 @@
 typedef struct prio_array prio_array_t;
 struct backing_dev_info;
 
+#define NGROUPS_INLINE 32
+
 struct task_struct {
         volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
         struct thread_info *thread_info;
@@ -348,7 +350,9 @@
         uid_t uid,euid,suid,fsuid;
         gid_t gid,egid,sgid,fsgid;
         int ngroups;
- gid_t groups[NGROUPS];
+ gid_t *groups;
+ gid_t groups_inline[NGROUPS_INLINE];
+ atomic_t *groups_refcount;
         kernel_cap_t cap_effective, cap_inheritable, cap_permitted;
         int keep_capabilities:1;
         struct user_struct *user;
diff -Nru a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
--- a/include/linux/sunrpc/svcauth.h Wed Dec 4 17:30:43 2002
+++ b/include/linux/sunrpc/svcauth.h Wed Dec 4 17:30:43 2002
@@ -14,10 +14,11 @@
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/cache.h>
 
+#define SVC_CRED_NGROUPS 32
 struct svc_cred {
         uid_t cr_uid;
         gid_t cr_gid;
- gid_t cr_groups[NGROUPS];
+ gid_t cr_groups[SVC_CRED_NGROUPS];
 };
 
 struct svc_rqst; /* forward decl */
diff -Nru a/kernel/exit.c b/kernel/exit.c
--- a/kernel/exit.c Wed Dec 4 17:30:43 2002
+++ b/kernel/exit.c Wed Dec 4 17:30:43 2002
@@ -57,6 +57,7 @@
         return proc_dentry;
 }
 
+extern void groups_free(gid_t *groups, int gidsetsize);
 void release_task(struct task_struct * p)
 {
         struct dentry *proc_dentry;
@@ -66,6 +67,12 @@
  
         if (p != current)
                 wait_task_inactive(p);
+
+ if (p->ngroups > NGROUPS_INLINE
+ && atomic_dec_and_test(p->groups_refcount)) {
+ kfree(p->groups_refcount);
+ groups_free(p->groups, p->ngroups);
+ }
 
         atomic_dec(&p->user->processes);
         security_task_free(p);
diff -Nru a/kernel/fork.c b/kernel/fork.c
--- a/kernel/fork.c Wed Dec 4 17:30:43 2002
+++ b/kernel/fork.c Wed Dec 4 17:30:43 2002
@@ -832,6 +832,13 @@
          */
         clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
 
+ /* increment the groups ref count */
+ if (p->ngroups > NGROUPS_INLINE) {
+ atomic_inc(p->groups_refcount);
+ } else if (p->ngroups) {
+ p->groups = p->groups_inline;
+ }
+
         /* Our parent execution domain becomes current domain
            These must match for thread signalling to apply */
            
diff -Nru a/kernel/sys.c b/kernel/sys.c
--- a/kernel/sys.c Wed Dec 4 17:30:43 2002
+++ b/kernel/sys.c Wed Dec 4 17:30:43 2002
@@ -21,6 +21,8 @@
 #include <linux/times.h>
 #include <linux/security.h>
 #include <linux/dcookies.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -1062,42 +1064,137 @@
         return i;
 }
 
+/* a simple shell-metzner sort */
+static void groupsort(gid_t *grouplist, int gidsetsize)
+{
+ int base, max, stride;
+
+ for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
+ ; /* nothing */
+ stride /= 3;
+
+ while (stride) {
+ max = gidsetsize - stride;
+ for (base = 0; base < max; base++) {
+ int left = base;
+ gid_t tmp = grouplist[base + stride];
+ while (left >= 0 && tmp < grouplist[left]) {
+ grouplist[left] = grouplist[left + stride];
+ left -= stride;
+ }
+ grouplist[left + stride] = tmp;
+ }
+ stride /= 3;
+ }
+}
+
+static int gid_t_cmp(const void *a, const void *b)
+{
+ return *((gid_t *)a) - *((gid_t *)b);
+}
+
+#define GROUPS_KV_THRESH (2*EXEC_PAGESIZE/sizeof(gid_t))
+gid_t *groups_alloc(int gidsetsize)
+{
+ if (gidsetsize <= GROUPS_KV_THRESH)
+ return kmalloc(gidsetsize * sizeof(gid_t), GFP_KERNEL);
+ else
+ return vmalloc(gidsetsize * sizeof(gid_t));
+}
+
+void groups_free(gid_t *groups, int gidsetsize)
+{
+ if (gidsetsize <= NGROUPS_INLINE)
+ ; /* nothing */
+ else if (gidsetsize <= GROUPS_KV_THRESH)
+ kfree(groups);
+ else
+ vfree(groups);
+}
+
 /*
- * SMP: Our groups are not shared. We can copy to/from them safely
+ * SMP: Our groups are copy-on-write. We can set them safely
  * without another task interfering.
  */
-
-asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist)
+int do_setgroups(int gidsetsize, gid_t *grouplist)
 {
- gid_t groups[NGROUPS];
+ atomic_t *newrefcnt = NULL;
         int retval;
 
- if (!capable(CAP_SETGID))
- return -EPERM;
- if ((unsigned) gidsetsize > NGROUPS)
- return -EINVAL;
- if(copy_from_user(groups, grouplist, gidsetsize * sizeof(gid_t)))
- return -EFAULT;
- retval = security_task_setgroups(gidsetsize, groups);
- if (retval)
+ BUG_ON(gidsetsize && !grouplist);
+
+ retval = security_task_setgroups(gidsetsize, grouplist);
+ if (retval) {
+ groups_free(grouplist, gidsetsize);
                 return retval;
- memcpy(current->groups, groups, gidsetsize * sizeof(gid_t));
+ }
+
+ if (gidsetsize > NGROUPS_INLINE) {
+ newrefcnt = kmalloc(sizeof(*newrefcnt), GFP_KERNEL);
+ if (!newrefcnt) {
+ groups_free(grouplist, gidsetsize);
+ return -ENOMEM;
+ }
+ atomic_set(newrefcnt, 1);
+ }
+ if (gidsetsize) {
+ /* sort the grouplist for faster searches */
+ groupsort(grouplist, gidsetsize);
+ }
+
+ /* disassociate ourselves from any shared group list */
+ if (current->ngroups > NGROUPS_INLINE
+ && atomic_dec_and_test(current->groups_refcount)) {
+ kfree(current->groups_refcount);
+ groups_free(current->groups, current->ngroups);
+ }
+
+ /* use the inline array for small numbers of groups */
+ if (gidsetsize <= NGROUPS_INLINE) {
+ memcpy(current->groups_inline, grouplist,
+ gidsetsize * sizeof(gid_t));
+ grouplist = current->groups_inline;
+ }
+
+ current->groups = grouplist;
+ current->groups_refcount = newrefcnt;
         current->ngroups = gidsetsize;
+
         return 0;
 }
+
+asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist)
+{
+ gid_t *groups = NULL;
+ gid_t groups_ar[NGROUPS_INLINE];
+
+ if (!capable(CAP_SETGID))
+ return -EPERM;
+ if (gidsetsize) {
+ if (gidsetsize <= NGROUPS_INLINE) {
+ groups = groups_ar;
+ } else {
+ groups = groups_alloc(gidsetsize);
+ if (!groups)
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(groups, grouplist,
+ gidsetsize * sizeof(gid_t))) {
+ groups_free(groups, gidsetsize);
+ return -EFAULT;
+ }
+ }
+
+ return do_setgroups(gidsetsize, groups);
+}
 
 static int supplemental_group_member(gid_t grp)
 {
- int i = current->ngroups;
-
- if (i) {
- gid_t *groups = current->groups;
- do {
- if (*groups == grp)
- return 1;
- groups++;
- i--;
- } while (i);
+ if (current->ngroups) {
+ if (bsearch(&grp, current->groups, current->ngroups,
+ sizeof(gid_t), gid_t_cmp))
+ return 1;
         }
         return 0;
 }
@@ -1390,3 +1487,4 @@
 EXPORT_SYMBOL(unregister_reboot_notifier);
 EXPORT_SYMBOL(in_group_p);
 EXPORT_SYMBOL(in_egroup_p);
+EXPORT_SYMBOL(sys_setgroups);
diff -Nru a/kernel/uid16.c b/kernel/uid16.c
--- a/kernel/uid16.c Wed Dec 4 17:30:43 2002
+++ b/kernel/uid16.c Wed Dec 4 17:30:43 2002
@@ -13,6 +13,8 @@
 #include <linux/init.h>
 #include <linux/highuid.h>
 #include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
 
 #include <asm/uaccess.h>
 
@@ -107,45 +109,73 @@
         return sys_setfsgid((gid_t)gid);
 }
 
+extern gid_t *groups_alloc(int gidsetsize);
+extern void groups_free(gid_t *groups, int gidsetsize);
+extern int do_setgroups(int gidsetsize, gid_t *grouplist);
+
 asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t *grouplist)
 {
- old_gid_t groups[NGROUPS];
+ old_gid_t *groups;
         int i,j;
 
         if (gidsetsize < 0)
                 return -EINVAL;
         i = current->ngroups;
- if (gidsetsize) {
+ if (i && gidsetsize) {
                 if (i > gidsetsize)
                         return -EINVAL;
+ groups = vmalloc(i * sizeof(old_gid_t));
+ if (!groups)
+ return -ENOMEM;
                 for(j=0;j<i;j++)
                         groups[j] = current->groups[j];
- if (copy_to_user(grouplist, groups, sizeof(old_gid_t)*i))
+ if (copy_to_user(grouplist, groups, sizeof(old_gid_t)*i)) {
+ vfree(groups);
                         return -EFAULT;
+ }
+ vfree(groups);
         }
         return i;
 }
 
 asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t *grouplist)
 {
- old_gid_t groups[NGROUPS];
- gid_t new_groups[NGROUPS];
+ old_gid_t *groups;
+ gid_t *new_groups = NULL;
+ gid_t new_groups_ar[NGROUPS_INLINE];
         int i;
 
         if (!capable(CAP_SETGID))
                 return -EPERM;
- if ((unsigned) gidsetsize > NGROUPS)
- return -EINVAL;
- if (copy_from_user(groups, grouplist, gidsetsize * sizeof(old_gid_t)))
- return -EFAULT;
- for (i = 0 ; i < gidsetsize ; i++)
- new_groups[i] = (gid_t)groups[i];
- i = security_task_setgroups(gidsetsize, new_groups);
- if (i)
- return i;
- memcpy(current->groups, new_groups, gidsetsize * sizeof(gid_t));
- current->ngroups = gidsetsize;
- return 0;
+ if (gidsetsize) {
+ groups = vmalloc(gidsetsize * sizeof(old_gid_t));
+ if (!groups)
+ return -ENOMEM;
+
+ if (copy_from_user(groups, grouplist,
+ gidsetsize * sizeof(old_gid_t))) {
+ vfree(groups);
+ return -EFAULT;
+ }
+
+ if (gidsetsize <= NGROUPS_INLINE) {
+ new_groups = new_groups_ar;
+ } else {
+ new_groups = groups_alloc(gidsetsize);
+ if (!new_groups) {
+ vfree(groups);
+ return -ENOMEM;
+ }
+ }
+
+ for (i = 0; i < gidsetsize; i++)
+ new_groups[i] = (gid_t)groups[i];
+
+ vfree(groups);
+ }
+
+ /* this handles the allocated new_groups */
+ return do_setgroups(gidsetsize, new_groups);
 }
 
 asmlinkage long sys_getuid16(void)
diff -Nru a/lib/Makefile b/lib/Makefile
--- a/lib/Makefile Wed Dec 4 17:30:43 2002
+++ b/lib/Makefile Wed Dec 4 17:30:43 2002
@@ -9,11 +9,11 @@
 L_TARGET := lib.a
 
 export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o \
- crc32.o rbtree.o radix-tree.o kobject.o
+ crc32.o rbtree.o radix-tree.o kobject.o bsearch.o
 
 obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o \
          bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
- kobject.o
+ kobject.o bsearch.o
 
 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
diff -Nru a/lib/bsearch.c b/lib/bsearch.c
--- /dev/null Wed Dec 31 16:00:00 1969
+++ b/lib/bsearch.c Wed Dec 4 17:30:43 2002
@@ -0,0 +1,49 @@
+/* Copyright (C) 1991, 1992, 1997, 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+/* Perform a binary search for KEY in BASE which has NMEMB elements
+ of SIZE bytes each. The comparisons are done by (*COMPAR)(). */
+void *
+bsearch(const void *key, const void *base, size_t nmemb, size_t size,
+ int (*compar)(const void *, const void *))
+{
+ size_t l, u, idx;
+ const void *p;
+ int comparison;
+
+ l = 0;
+ u = nmemb;
+ while (l < u) {
+ idx = (l + u) / 2;
+ p = (void *)(((const char *)base) + (idx * size));
+ comparison = (*compar)(key, p);
+ if (comparison < 0)
+ u = idx;
+ else if (comparison > 0)
+ l = idx + 1;
+ else
+ return (void *)p;
+ }
+
+ return NULL;
+}
+
+EXPORT_SYMBOL(bsearch);
diff -Nru a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
--- a/net/sunrpc/svcauth_unix.c Wed Dec 4 17:30:43 2002
+++ b/net/sunrpc/svcauth_unix.c Wed Dec 4 17:30:43 2002
@@ -401,11 +401,11 @@
         if (slen > 16 || (len -= (slen + 2)*4) < 0)
                 goto badcred;
         for (i = 0; i < slen; i++)
- if (i < NGROUPS)
+ if (i < SVC_CRED_NGROUPS)
                         cred->cr_groups[i] = ntohl(svc_getu32(argv));
                 else
                         svc_getu32(argv);
- if (i < NGROUPS)
+ if (i < SVC_CRED_NGROUPS)
                 cred->cr_groups[i] = NOGROUP;
 
         if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sat Dec 07 2002 - 22:00:21 EST