[RFC PATCH 2/2] userns/nproc: Add hooks for userns nproc management

From: Nikolay Borisov
Date: Tue Sep 08 2015 - 04:11:42 EST


From: Nikolay Borisov <n.borisov@xxxxxxxxxxxxxx>

This patch introduce the usage of the userns_nproc_* functions
where necessary to have correct accounting of the processes.

Signed-off-by: Nikolay Borisov <kernel@xxxxxxxx>
---
kernel/cred.c | 36 ++++++++++++++++++++++++++++++++++--
kernel/exit.c | 9 +++++++++
kernel/fork.c | 33 +++++++++++++++++++++++++++------
3 files changed, 70 insertions(+), 8 deletions(-)

diff --git a/kernel/cred.c b/kernel/cred.c
index b7581dc..79565b8 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -320,6 +320,7 @@ struct cred *prepare_exec_creds(void)
int copy_creds(struct task_struct *p, unsigned long clone_flags)
{
struct cred *new;
+ struct user_namespace *ns;
int ret;

if (
@@ -331,10 +332,15 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
p->real_cred = get_cred(p->cred);
get_cred(p->cred);
alter_cred_subscribers(p->cred, 2);
+ ns = p->real_cred->user_ns;
kdebug("share_creds(%p{%d,%d})",
p->cred, atomic_read(&p->cred->usage),
read_cred_subscribers(p->cred));
atomic_inc(&p->cred->user->processes);
+ if (ns != &init_user_ns) {
+ pr_info ("%s: incrementing nproc from due copy_process (CLONE_THREAD)\n", __func__);
+ userns_nproc_inc(ns, from_kuid_munged(ns, p->real_cred->uid));
+ }
return 0;
}

@@ -343,6 +349,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
return -ENOMEM;

if (clone_flags & CLONE_NEWUSER) {
+ pr_debug("%s: Creating new usernamespace\n", __func__);
ret = create_user_ns(new);
if (ret < 0)
goto error_put;
@@ -369,6 +376,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)

atomic_inc(&new->user->processes);
p->cred = p->real_cred = get_cred(new);
+ ns = p->real_cred->user_ns;
+ if (ns != &init_user_ns) {
+ pr_info("%s: Incrementing due to not-being a thread\n", __func__);
+ userns_nproc_inc(ns, from_kuid_munged(ns, p->real_cred->uid));
+ }
alter_cred_subscribers(new, 2);
validate_creds(new);
return 0;
@@ -454,17 +466,37 @@ int commit_creds(struct cred *new)
if (!gid_eq(new->fsgid, old->fsgid))
key_fsgid_changed(task);

+ /* Handle cases when a process is moving from one userns to another */
+ if (old->user_ns != new->user_ns) {
+ if (new->user_ns != &init_user_ns) {
+ pr_info ("\t%s: incrementing user count in %p\n", __func__, new->user_ns);
+ userns_nproc_inc(new->user_ns, from_kuid_munged(new->user_ns, new->uid));
+ }
+ if (old->user_ns != &init_user_ns) {
+ pr_info ("\t%s: decrementing user_count in %p\n", __func__, old->user_ns);
+ userns_nproc_dec(old->user_ns, from_kuid_munged(old->user_ns, old->uid));
+ }
+ }
+
/* do it
* RLIMIT_NPROC limits on user->processes have already been checked
* in set_user().
*/
alter_cred_subscribers(new, 2);
- if (new->user != old->user)
+ if (new->user != old->user) {
atomic_inc(&new->user->processes);
+ if (new->user_ns != &init_user_ns)
+ userns_nproc_inc(new->user_ns,
+ from_kuid_munged(new->user_ns, new->uid));
+ }
rcu_assign_pointer(task->real_cred, new);
rcu_assign_pointer(task->cred, new);
- if (new->user != old->user)
+ if (new->user != old->user) {
atomic_dec(&old->user->processes);
+ if (old->user_ns != &init_user_ns)
+ userns_nproc_dec(old->user_ns,
+ from_kuid_munged(old->user_ns, old->uid));
+ }
alter_cred_subscribers(old, -2);

/* send notifications */
diff --git a/kernel/exit.c b/kernel/exit.c
index 22fcc05..dde172b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -170,13 +170,22 @@ void release_task(struct task_struct *p)
{
struct task_struct *leader;
int zap_leader;
+ struct user_namespace *ns;
+ kuid_t uid;
repeat:
/* don't need to get the RCU readlock here - the process is dead and
* can't be modifying its own credentials. But shut RCU-lockdep up */
rcu_read_lock();
atomic_dec(&__task_cred(p)->user->processes);
+ ns = get_user_ns(__task_cred(p)->user_ns);
+ uid = __task_cred(p)->uid;
rcu_read_unlock();

+ if (ns != &init_user_ns)
+ userns_nproc_dec(ns, from_kuid_munged(ns, uid));
+
+ put_user_ns(ns);
+
proc_flush_task(p);

write_lock_irq(&tasklist_lock);
diff --git a/kernel/fork.c b/kernel/fork.c
index f9826a3..c537b6a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1308,18 +1308,34 @@ static struct task_struct *copy_process(unsigned long clone_flags,
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
retval = -EAGAIN;
- if (atomic_read(&p->real_cred->user->processes) >=
- task_rlimit(p, RLIMIT_NPROC)) {
- if (p->real_cred->user != INIT_USER &&
- !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
- goto bad_fork_free;
- }
+ //If we are in the root namespace use this check
+ if (p->real_cred->user_ns == &init_user_ns) {
+ if (atomic_read(&p->real_cred->user->processes) >=
+ task_rlimit(p, RLIMIT_NPROC)) {
+ if (p->real_cred->user != INIT_USER &&
+ !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
+ goto bad_fork_free;
+ }
current->flags &= ~PF_NPROC_EXCEEDED;
+ }

retval = copy_creds(p, clone_flags);
if (retval < 0)
goto bad_fork_free;

+ //Otherwise perform the non-root userns check here
+ //since we want the stuff in copy_cred to have already happened
+ if (p->real_cred->user_ns != &init_user_ns) {
+ struct user_namespace *ns = p->real_cred->user_ns;
+ int32_t processes = get_userns_nproc(ns, from_kuid_munged(ns, p->real_cred->uid));
+ retval = -EAGAIN;
+
+ if (processes >= task_rlimit(p, RLIMIT_NPROC))
+ goto bad_fork_cleanup_userns_count;
+ else
+ current->flags &= ~PF_NPROC_EXCEEDED;
+ }
+
/*
* If multiple threads are within copy_process(), then this check
* triggers too late. This doesn't hurt, the check is only there
@@ -1652,6 +1668,9 @@ bad_fork_cleanup_threadgroup_lock:
delayacct_tsk_free(p);
bad_fork_cleanup_count:
atomic_dec(&p->cred->user->processes);
+bad_fork_cleanup_userns_count:
+ if (p->cred->user_ns != &init_user_ns)
+ userns_nproc_dec(p->cred->user_ns, from_kuid_munged(p->cred->user_ns, p->cred->uid));
exit_creds(p);
bad_fork_free:
free_task(p);
@@ -1936,6 +1955,7 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
int do_sysvsem = 0;
int err;

+ pr_info("%s begin\n", __func__);
/*
* If unsharing a user namespace must also unshare the thread.
*/
@@ -2037,6 +2057,7 @@ bad_unshare_cleanup_fs:
free_fs_struct(new_fs);

bad_unshare_out:
+ pr_info("%s end\n", __func__);
return err;
}

--
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/