Re: [PATCH][BUGFIX] cgroups: fix pid namespace bug

From: Paul Menage
Date: Thu Jul 02 2009 - 12:26:55 EST


On Wed, Jul 1, 2009 at 6:24 PM, Li Zefan<lizf@xxxxxxxxxxxxxx> wrote:
> The bug was introduced by commit cc31edceee04a7b87f2be48f9489ebb72d264844
> ("cgroups: convert tasks file to use a seq_file with shared pid array").
>
> We cache a pid array for all threads that are opening the same "tasks"
> file, but the pids in the array are always from the namespace of the
> last process that opened the file, so all other threads will read pids
> from that namespace instead of their own namespaces.
>
> To fix it, we maintain a list of pid arrays, which is keyed by pid_ns.
> The list will be of length 1 at most time.
>
> Reported-by: Paul Menage <menage@xxxxxxxxxx>
> Idea-by: Paul Menage <menage@xxxxxxxxxx>
> Signed-off-by: Li Zefan <lizf@xxxxxxxxxxxxxx>
> ---
>  include/linux/cgroup.h |   11 ++----
>  kernel/cgroup.c        |   94 +++++++++++++++++++++++++++++++++++------------
>  2 files changed, 74 insertions(+), 31 deletions(-)
>
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index 665fa70..20411d2 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -179,14 +179,11 @@ struct cgroup {
>         */
>        struct list_head release_list;
>
> -       /* pids_mutex protects the fields below */
> +       /* pids_mutex protects pids_list and cached pid arrays. */
>        struct rw_semaphore pids_mutex;
> -       /* Array of process ids in the cgroup */
> -       pid_t *tasks_pids;
> -       /* How many files are using the current tasks_pids array */
> -       int pids_use_count;
> -       /* Length of the current tasks_pids array */
> -       int pids_length;
> +
> +       /* Linked list of struct cgroup_pids */
> +       struct list_head pids_list;
>
>        /* For RCU-protected deletion */
>        struct rcu_head rcu_head;
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 3737a68..13dddb4 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -47,6 +47,7 @@
>  #include <linux/hash.h>
>  #include <linux/namei.h>
>  #include <linux/smp_lock.h>
> +#include <linux/pid_namespace.h>
>
>  #include <asm/atomic.h>
>
> @@ -960,6 +961,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
>        INIT_LIST_HEAD(&cgrp->children);
>        INIT_LIST_HEAD(&cgrp->css_sets);
>        INIT_LIST_HEAD(&cgrp->release_list);
> +       INIT_LIST_HEAD(&cgrp->pids_list);
>        init_rwsem(&cgrp->pids_mutex);
>  }
>  static void init_cgroup_root(struct cgroupfs_root *root)
> @@ -2201,12 +2203,30 @@ err:
>        return ret;
>  }
>
> +/*
> + * Cache pids for all threads in the same pid namespace that are
> + * opening the same "tasks" file.
> + */
> +struct cgroup_pids {
> +       /* The node in cgrp->pids_list */
> +       struct list_head list;
> +       /* The cgroup those pids belong to */
> +       struct cgroup *cgrp;
> +       /* The namepsace those pids belong to */
> +       struct pid_namespace *pid_ns;
> +       /* Array of process ids in the cgroup */
> +       pid_t *tasks_pids;
> +       /* How many files are using the this tasks_pids array */
> +       int pids_use_count;
> +       /* Length of the current tasks_pids array */
> +       int pids_length;
> +};

Maybe lose the unnecessary "pids" suffices and prefixes in this structure?

>
> @@ -2324,6 +2352,8 @@ static struct file_operations cgroup_tasks_operations = {
>  static int cgroup_tasks_open(struct inode *unused, struct file *file)
>  {
>        struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
> +       struct pid_namespace *pid_ns = task_active_pid_ns(current);
> +       struct cgroup_pids *cp;
>        pid_t *pidarray;
>        int npids;
>        int retval;
> @@ -2350,20 +2380,36 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
>         * array if necessary
>         */
>        down_write(&cgrp->pids_mutex);
> -       kfree(cgrp->tasks_pids);
> -       cgrp->tasks_pids = pidarray;
> -       cgrp->pids_length = npids;
> -       cgrp->pids_use_count++;
> +
> +       list_for_each_entry(cp, &cgrp->pids_list, list) {
> +               if (pid_ns == cp->pid_ns)
> +                       goto found;
> +       }
> +
> +       cp = kzalloc(sizeof(*cp), GFP_KERNEL);
> +       if (!cp) {
> +               up_write(&cgrp->pids_mutex);
> +               kfree(pidarray);
> +               return -ENOMEM;
> +       }
> +       cp->cgrp = cgrp;
> +       cp->pid_ns = pid_ns;

You're storing an uncounted reference to the pid ns here - there's no
guarantee that the pid_ns will outlive the open file.

Paul
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/