Re: [REVIEW][PATCH 2/4] proc: Implement /proc/thread-self to point at the directory of the current thread

From: Serge E. Hallyn
Date: Wed Aug 06 2014 - 10:35:11 EST


Quoting Eric W. Biederman (ebiederm@xxxxxxxxxxxx):
>
> /proc/thread-self is derived from /proc/self. /proc/thread-self
> points to the directory in proc containing information about the
> current thread.
>
> This funtionality has been missing for a long time, and is tricky to
> implement in userspace as gettid() is not exported by glibc. More
> importantly this allows fixing defects in /proc/mounts and /proc/net
> where in a threaded application today they wind up being empty files
> when only the initial pthread has exited, causing problems for other
> threads.
>
> Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>

Hi Eric,

I've not had a chance to test these, but apart from two trivial
comments below these look good to me, and I appreciate the feature.
So with the two fixes (if needed),

Acked-by: Serge Hallyn <serge.hallyn@xxxxxxxxxxxxx>

> ---
> fs/proc/Makefile | 1 +
> fs/proc/base.c | 15 +++++---
> fs/proc/inode.c | 7 +++-
> fs/proc/internal.h | 6 +++
> fs/proc/root.c | 3 ++
> fs/proc/thread_self.c | 85 +++++++++++++++++++++++++++++++++++++++++++
> include/linux/pid_namespace.h | 1 +
> 7 files changed, 112 insertions(+), 6 deletions(-)
> create mode 100644 fs/proc/thread_self.c
>
> diff --git a/fs/proc/Makefile b/fs/proc/Makefile
> index 239493ec718e..7151ea428041 100644
> --- a/fs/proc/Makefile
> +++ b/fs/proc/Makefile
> @@ -23,6 +23,7 @@ proc-y += version.o
> proc-y += softirqs.o
> proc-y += namespaces.o
> proc-y += self.o
> +proc-y += thread_self.o
> proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
> proc-$(CONFIG_NET) += proc_net.o
> proc-$(CONFIG_PROC_KCORE) += kcore.o
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index ed34e405c6b9..0131156ce7c9 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -2847,7 +2847,7 @@ retry:
> return iter;
> }
>
> -#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1)
> +#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
>
> /* for the /proc/ directory itself, after non-process stuff has been done */
> int proc_pid_readdir(struct file *file, struct dir_context *ctx)
> @@ -2859,14 +2859,19 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
> if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
> return 0;
>
> - if (pos == TGID_OFFSET - 1) {
> + if (pos == TGID_OFFSET - 2) {
> struct inode *inode = ns->proc_self->d_inode;
> if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
> return 0;
> - iter.tgid = 0;
> - } else {
> - iter.tgid = pos - TGID_OFFSET;
> + ctx->pos = pos = pos + 1;
> + }
> + if (pos == TGID_OFFSET - 1) {
> + struct inode *inode = ns->proc_thread_self->d_inode;
> + if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
> + return 0;
> + ctx->pos = pos = pos + 1;
> }
> + iter.tgid = pos - TGID_OFFSET;
> iter.task = NULL;
> for (iter = next_tgid(ns, iter);
> iter.task;
> diff --git a/fs/proc/inode.c b/fs/proc/inode.c
> index 0adbc02d60e3..333080d7a671 100644
> --- a/fs/proc/inode.c
> +++ b/fs/proc/inode.c
> @@ -442,6 +442,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
> int proc_fill_super(struct super_block *s)
> {
> struct inode *root_inode;
> + int ret;
>
> s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
> s->s_blocksize = 1024;
> @@ -463,5 +464,9 @@ int proc_fill_super(struct super_block *s)
> return -ENOMEM;
> }
>
> - return proc_setup_self(s);
> + ret = proc_setup_self(s);
> + if (ret) {
> + return ret;
> + }
> + return proc_setup_thread_self(s);
> }
> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
> index 3ab6d14e71c5..ee04619173b2 100644
> --- a/fs/proc/internal.h
> +++ b/fs/proc/internal.h
> @@ -234,6 +234,12 @@ static inline int proc_net_init(void) { return 0; }
> extern int proc_setup_self(struct super_block *);
>
> /*
> + * proc_thread_self.c
> + */
> +extern int proc_setup_thread_self(struct super_block *);
> +extern void proc_thread_self_init(void);
> +
> +/*
> * proc_sysctl.c
> */
> #ifdef CONFIG_PROC_SYSCTL
> diff --git a/fs/proc/root.c b/fs/proc/root.c
> index 5dbadecb234d..48f1c03bc7ed 100644
> --- a/fs/proc/root.c
> +++ b/fs/proc/root.c
> @@ -149,6 +149,8 @@ static void proc_kill_sb(struct super_block *sb)
> ns = (struct pid_namespace *)sb->s_fs_info;
> if (ns->proc_self)
> dput(ns->proc_self);
> + if (ns->proc_thread_self)
> + dput(ns->proc_thread_self);
> kill_anon_super(sb);
> put_pid_ns(ns);
> }
> @@ -170,6 +172,7 @@ void __init proc_root_init(void)
> return;
>
> proc_self_init();
> + proc_thread_self_init();
> proc_symlink("mounts", NULL, "self/mounts");
>
> proc_net_init();
> diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
> new file mode 100644
> index 000000000000..59075b509df3
> --- /dev/null
> +++ b/fs/proc/thread_self.c
> @@ -0,0 +1,85 @@
> +#include <linux/sched.h>
> +#include <linux/namei.h>
> +#include <linux/slab.h>
> +#include <linux/pid_namespace.h>
> +#include "internal.h"
> +
> +/*
> + * /proc/thread_self:
> + */
> +static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer,
> + int buflen)
> +{
> + struct pid_namespace *ns = dentry->d_sb->s_fs_info;
> + pid_t tgid = task_tgid_nr_ns(current, ns);
> + pid_t pid = task_pid_nr_ns(current, ns);
> + char tmp[PROC_NUMBUF + 6 + PROC_NUMBUF];

In the extreme case you're not adding space for a \0 ? (Unless
PROC_NUMBUF includes that)

> + if (!pid)
> + return -ENOENT;
> + sprintf(tmp, "%d/task/%d", tgid, pid);
> + return readlink_copy(buffer, buflen, tmp);
> +}
> +
> +static void *proc_thread_self_follow_link(struct dentry *dentry, struct nameidata *nd)
> +{
> + struct pid_namespace *ns = dentry->d_sb->s_fs_info;
> + pid_t tgid = task_tgid_nr_ns(current, ns);
> + pid_t pid = task_pid_nr_ns(current, ns);
> + char *name = ERR_PTR(-ENOENT);
> + if (pid) {
> + name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);

Same here.

> + if (!name)
> + name = ERR_PTR(-ENOMEM);
> + else
> + sprintf(name, "%d/task/%d", tgid, pid);
> + }
> + nd_set_link(nd, name);
> + return NULL;
> +}
> +
> +static const struct inode_operations proc_thread_self_inode_operations = {
> + .readlink = proc_thread_self_readlink,
> + .follow_link = proc_thread_self_follow_link,
> + .put_link = kfree_put_link,
> +};
> +
> +static unsigned thread_self_inum;
> +
> +int proc_setup_thread_self(struct super_block *s)
> +{
> + struct inode *root_inode = s->s_root->d_inode;
> + struct pid_namespace *ns = s->s_fs_info;
> + struct dentry *thread_self;
> +
> + mutex_lock(&root_inode->i_mutex);
> + thread_self = d_alloc_name(s->s_root, "thread-self");
> + if (thread_self) {
> + struct inode *inode = new_inode_pseudo(s);
> + if (inode) {
> + inode->i_ino = thread_self_inum;
> + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
> + inode->i_mode = S_IFLNK | S_IRWXUGO;
> + inode->i_uid = GLOBAL_ROOT_UID;
> + inode->i_gid = GLOBAL_ROOT_GID;
> + inode->i_op = &proc_thread_self_inode_operations;
> + d_add(thread_self, inode);
> + } else {
> + dput(thread_self);
> + thread_self = ERR_PTR(-ENOMEM);
> + }
> + } else {
> + thread_self = ERR_PTR(-ENOMEM);
> + }
> + mutex_unlock(&root_inode->i_mutex);
> + if (IS_ERR(thread_self)) {
> + pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
> + return PTR_ERR(thread_self);
> + }
> + ns->proc_thread_self = thread_self;
> + return 0;
> +}
> +
> +void __init proc_thread_self_init(void)
> +{
> + proc_alloc_inum(&thread_self_inum);
> +}
> diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
> index 7246ef3d4455..1997ffc295a7 100644
> --- a/include/linux/pid_namespace.h
> +++ b/include/linux/pid_namespace.h
> @@ -33,6 +33,7 @@ struct pid_namespace {
> #ifdef CONFIG_PROC_FS
> struct vfsmount *proc_mnt;
> struct dentry *proc_self;
> + struct dentry *proc_thread_self;
> #endif
> #ifdef CONFIG_BSD_PROCESS_ACCT
> struct bsd_acct_struct *bacct;
> --
> 1.9.1
>
> _______________________________________________
> Containers mailing list
> Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx
> https://lists.linuxfoundation.org/mailman/listinfo/containers
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/