Re: [RFC PATCH v2] taskstats: add /proc/taskstats to fetch pid/tgid status

From: Balbir Singh
Date: Wed Jan 27 2021 - 06:11:37 EST


On Mon, Dec 28, 2020 at 10:10:03PM +0800, Weiping Zhang wrote:
> Hi David,
>
> Could you help review this patch ?
>
> thanks

I've got it on my review list, thanks for the ping!
You should hear back from me soon.

Balbir Singh.

>
> On Fri, Dec 18, 2020 at 1:24 AM Weiping Zhang
> <zhangweiping@xxxxxxxxxxxxxx> wrote:
> >
> > If a program needs monitor lots of process's status, it needs two
> > syscalls for every process. The first one is telling kernel which
> > pid/tgid should be monitored by send a command(write socket) to kernel.
> > The second one is read the statistics by read socket. This patch add
> > a new interface /proc/taskstats to reduce two syscalls to one ioctl.
> > The user just set the target pid/tgid to the struct taskstats.ac_pid,
> > then kernel will collect statistics for that pid/tgid.
> >
> > Signed-off-by: Weiping Zhang <zhangweiping@xxxxxxxxxxxxxx>
> > ---
> > Changes since v1:
> > * use /proc/taskstats instead of /dev/taskstats
> >
> > include/uapi/linux/taskstats.h | 5 +++
> > kernel/taskstats.c | 73 +++++++++++++++++++++++++++++++++-
> > 2 files changed, 77 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
> > index ccbd08709321..077eab84c77e 100644
> > --- a/include/uapi/linux/taskstats.h
> > +++ b/include/uapi/linux/taskstats.h
> > @@ -214,6 +214,11 @@ enum {
> >
> > #define TASKSTATS_CMD_ATTR_MAX (__TASKSTATS_CMD_ATTR_MAX - 1)
> >
> > +/* ioctl command */
> > +#define TASKSTATS_IOC_ATTR_PID _IO('T', TASKSTATS_CMD_ATTR_PID)
> > +#define TASKSTATS_IOC_ATTR_TGID _IO('T', TASKSTATS_CMD_ATTR_TGID)
> > +
> > +
> > /* NETLINK_GENERIC related info */
> >
> > #define TASKSTATS_GENL_NAME "TASKSTATS"
> > diff --git a/kernel/taskstats.c b/kernel/taskstats.c
> > index a2802b6ff4bb..c0f9e2f2308b 100644
> > --- a/kernel/taskstats.c
> > +++ b/kernel/taskstats.c
> > @@ -21,6 +21,8 @@
> > #include <net/genetlink.h>
> > #include <linux/atomic.h>
> > #include <linux/sched/cputime.h>
> > +#include <linux/proc_fs.h>
> > +#include <linux/uio.h>
> >
> > /*
> > * Maximum length of a cpumask that can be specified in
> > @@ -28,6 +30,10 @@
> > */
> > #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS)
> >
> > +#ifdef CONFIG_PROC_FS
> > +#define PROC_TASKSTATS "taskstats"
> > +#endif
> > +
> > static DEFINE_PER_CPU(__u32, taskstats_seqnum);
> > static int family_registered;
> > struct kmem_cache *taskstats_cache;
> > @@ -666,6 +672,60 @@ static struct genl_family family __ro_after_init = {
> > .n_ops = ARRAY_SIZE(taskstats_ops),
> > };
> >
> > +#ifdef CONFIG_PROC_FS
> > +static long taskstats_ioctl_pid_tgid(unsigned long arg, bool tgid)
> > +{
> > + struct taskstats kstat;
> > + struct taskstats *ustat = (struct taskstats *)arg;
> > + u32 id;
> > + unsigned long offset = offsetof(struct taskstats, ac_pid);
> > + long ret;
> > +
> > + /* userspace set monitored pid/tgid to the field "ac_pid" */
> > + if (unlikely(copy_from_user(&id, (void *)(arg + offset), sizeof(u32))))
> > + return -EFAULT;
> > +
> > + if (tgid)
> > + ret = fill_stats_for_tgid(id, &kstat);
> > + else
> > + ret = fill_stats_for_pid(id, &kstat);
> > + if (ret < 0)
> > + return ret;
> > +
> > + if (unlikely(copy_to_user(ustat, &kstat, sizeof(struct taskstats))))
> > + return -EFAULT;
> > +
> > + return 0;
> > +}
> > +
> > +static long taskstats_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> > +{
> > + long ret;
> > +
> > + switch (cmd) {
> > + case TASKSTATS_IOC_ATTR_PID:
> > + ret = taskstats_ioctl_pid_tgid(arg, false);
> > + break;
> > + case TASKSTATS_IOC_ATTR_TGID:
> > + ret = taskstats_ioctl_pid_tgid(arg, true);
> > + break;
> > + default:
> > + ret = -EINVAL;
> > + break;
> > + }
> > +
> > + return ret;
> > +}
> > +
> > +static const struct proc_ops taskstats_proc_ops = {
> > + .proc_ioctl = taskstats_ioctl,
> > +#ifdef CONFIG_COMPAT
> > + .proc_compat_ioctl = taskstats_ioctl,
> > +#endif
> > +};
> > +#endif
> > +
> > +
> > /* Needed early in initialization */
> > void __init taskstats_init_early(void)
> > {
> > @@ -682,9 +742,20 @@ static int __init taskstats_init(void)
> > {
> > int rc;
> >
> > +#ifdef CONFIG_PROC_FS
> > + if (!proc_create(PROC_TASKSTATS, 0, NULL, &taskstats_proc_ops)) {
> > + pr_err("failed to create /proc/%s\n", PROC_TASKSTATS);
> > + return -1;
> > + }
> > +#endif
> > +
> > rc = genl_register_family(&family);
> > - if (rc)
> > + if (rc) {
> > +#ifdef CONFIG_PROC_FS
> > + remove_proc_entry(PROC_TASKSTATS, NULL);
> > +#endif
> > return rc;
> > + }
> >
> > family_registered = 1;
> > pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
> > --
> > 2.17.2
> >