Re: [PATCH v8 1/3] cgroups: read-write lock CLONE_THREAD forking per threadgroup

From: Paul Menage
Date: Thu Mar 03 2011 - 12:54:53 EST


On Mon, Feb 7, 2011 at 5:37 PM, Ben Blum <bblum@xxxxxxxxxxxxxx> wrote:
> Adds functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup
>
> From: Ben Blum <bblum@xxxxxxxxxxxxxx>
>
> This patch adds an rwsem that lives in a threadgroup's signal_struct that's
> taken for reading in the fork path, under CONFIG_CGROUPS. If another part of
> the kernel later wants to use such a locking mechanism, the CONFIG_CGROUPS
> ifdefs should be changed to a higher-up flag that CGROUPS and the other system
> would both depend on.
>
> This is a pre-patch for cgroup-procs-write.patch.
>
> Signed-off-by: Ben Blum <bblum@xxxxxxxxxxxxxx>

Reviewed-by: Paul Menage <menage@xxxxxxxxxx>

AFAICS, the only change from the previous version of this patch is the
addition of including linux/rwsem.h in sched.h, so I think it's fair
to assume my previous Reviewed-by: tag still holds.

(Incidentally, does anyone have any handy tools for tracking diffs
between things you've previously tagged as Acked or Reviewed-by, and
newer versions?

Paul

> ---
>  include/linux/init_task.h |    9 +++++++++
>  include/linux/sched.h     |   37 +++++++++++++++++++++++++++++++++++++
>  kernel/fork.c             |   10 ++++++++++
>  3 files changed, 56 insertions(+), 0 deletions(-)
>
> diff --git a/include/linux/init_task.h b/include/linux/init_task.h
> index 6b281fa..b560381 100644
> --- a/include/linux/init_task.h
> +++ b/include/linux/init_task.h
> @@ -15,6 +15,14 @@
>  extern struct files_struct init_files;
>  extern struct fs_struct init_fs;
>
> +#ifdef CONFIG_CGROUPS
> +#define INIT_THREADGROUP_FORK_LOCK(sig)                                        \
> +       .threadgroup_fork_lock =                                        \
> +               __RWSEM_INITIALIZER(sig.threadgroup_fork_lock),
> +#else
> +#define INIT_THREADGROUP_FORK_LOCK(sig)
> +#endif
> +
>  #define INIT_SIGNALS(sig) {                                            \
>        .nr_threads     = 1,                                            \
>        .wait_chldexit  = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
> @@ -31,6 +39,7 @@ extern struct fs_struct init_fs;
>        },                                                              \
>        .cred_guard_mutex =                                             \
>                 __MUTEX_INITIALIZER(sig.cred_guard_mutex),             \
> +       INIT_THREADGROUP_FORK_LOCK(sig)                                 \
>  }
>
>  extern struct nsproxy init_nsproxy;
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 8580dc6..2fdbeb1 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -509,6 +509,8 @@ struct thread_group_cputimer {
>        spinlock_t lock;
>  };
>
> +#include <linux/rwsem.h>
> +
>  /*
>  * NOTE! "signal_struct" does not have it's own
>  * locking, because a shared signal_struct always
> @@ -623,6 +625,16 @@ struct signal_struct {
>        unsigned audit_tty;
>        struct tty_audit_buf *tty_audit_buf;
>  #endif
> +#ifdef CONFIG_CGROUPS
> +       /*
> +        * The threadgroup_fork_lock prevents threads from forking with
> +        * CLONE_THREAD while held for writing. Use this for fork-sensitive
> +        * threadgroup-wide operations. It's taken for reading in fork.c in
> +        * copy_process().
> +        * Currently only needed write-side by cgroups.
> +        */
> +       struct rw_semaphore threadgroup_fork_lock;
> +#endif
>
>        int oom_adj;            /* OOM kill score adjustment (bit shift) */
>        int oom_score_adj;      /* OOM kill score adjustment */
> @@ -2270,6 +2282,31 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
>        spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
>  }
>
> +/* See the declaration of threadgroup_fork_lock in signal_struct. */
> +#ifdef CONFIG_CGROUPS
> +static inline void threadgroup_fork_read_lock(struct task_struct *tsk)
> +{
> +       down_read(&tsk->signal->threadgroup_fork_lock);
> +}
> +static inline void threadgroup_fork_read_unlock(struct task_struct *tsk)
> +{
> +       up_read(&tsk->signal->threadgroup_fork_lock);
> +}
> +static inline void threadgroup_fork_write_lock(struct task_struct *tsk)
> +{
> +       down_write(&tsk->signal->threadgroup_fork_lock);
> +}
> +static inline void threadgroup_fork_write_unlock(struct task_struct *tsk)
> +{
> +       up_write(&tsk->signal->threadgroup_fork_lock);
> +}
> +#else
> +static inline void threadgroup_fork_read_lock(struct task_struct *tsk) {}
> +static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) {}
> +static inline void threadgroup_fork_write_lock(struct task_struct *tsk) {}
> +static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) {}
> +#endif
> +
>  #ifndef __HAVE_THREAD_FUNCTIONS
>
>  #define task_thread_info(task) ((struct thread_info *)(task)->stack)
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 0979527..aefe61f 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -905,6 +905,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
>
>        tty_audit_fork(sig);
>
> +#ifdef CONFIG_CGROUPS
> +       init_rwsem(&sig->threadgroup_fork_lock);
> +#endif
> +
>        sig->oom_adj = current->signal->oom_adj;
>        sig->oom_score_adj = current->signal->oom_score_adj;
>        sig->oom_score_adj_min = current->signal->oom_score_adj_min;
> @@ -1087,6 +1091,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
>        monotonic_to_bootbased(&p->real_start_time);
>        p->io_context = NULL;
>        p->audit_context = NULL;
> +       if (clone_flags & CLONE_THREAD)
> +               threadgroup_fork_read_lock(current);
>        cgroup_fork(p);
>  #ifdef CONFIG_NUMA
>        p->mempolicy = mpol_dup(p->mempolicy);
> @@ -1294,6 +1300,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
>        write_unlock_irq(&tasklist_lock);
>        proc_fork_connector(p);
>        cgroup_post_fork(p);
> +       if (clone_flags & CLONE_THREAD)
> +               threadgroup_fork_read_unlock(current);
>        perf_event_fork(p);
>        return p;
>
> @@ -1332,6 +1340,8 @@ bad_fork_cleanup_policy:
>        mpol_put(p->mempolicy);
>  bad_fork_cleanup_cgroup:
>  #endif
> +       if (clone_flags & CLONE_THREAD)
> +               threadgroup_fork_read_unlock(current);
>        cgroup_exit(p, cgroup_callbacks_done);
>        delayacct_tsk_free(p);
>        module_put(task_thread_info(p)->exec_domain->module);
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/