Re: [RFC PATCH] cpuset: remove /proc/PID/cpuset

From: 许春光
Date: Thu Feb 29 2024 - 21:43:38 EST


Hi waiman:

Thanks for analyse in detail, we found some guy use /proc/PID/cpuset
to get the path and cause a live issue.

Thanks.

Waiman Long <longman@xxxxxxxxxx> 于2024年2月10日周六 12:34写道:
>
> On 2/2/24 05:49, brookxu.cn wrote:
> > From: Chunguang Xu <chunguang.xu@xxxxxxxxxx>
> >
> > Now we can get all cgroup paths from /proc/PID/cgroup for
> > a long time, so it maybe useless to keep /proc/PID/cpuset,
> > besides the path get from /proc/PID/cpuset is not consistent
> > with /proc/PID/cgroup in default hierarchy, so now we may
> > can safely remove /proc/PID/cpuset to avoid the mismatch.
>
> With cgroup v2, the value of /proc/PID/cgroup may not match
> /proc/PID/cpuset. That is the expected behavior due to the way cgroup v2
> works. For example,
>
> Root [CS] --> A [CS] --> B --> C where [CS] means cpuset controller is
> enabled at that cgroup.
>
> If a process P is in cgroup C, /proc/PID/cgroup will show /A/B/C.
> However, /proc/PID/cpuset will show /A because it is where the cpuset
> controller setting will apply to process P. Also removing an existing
> procfs file may break an existing application that relies on it. So we
> don't do that unless there is a strong reason to do so.
>
> NAK
>
> >
> > root@test:~# cat /proc/1186/cgroup
> > 0::/system.slice/lmeter.service
> >
> > root@test:~# cat /proc/1186/cpuset
> > /system.slice
> >
> > Signed-off-by: Chunguang Xu <chunguang.xu@xxxxxxxxxx>
> > ---
> > arch/mips/configs/sb1250_swarm_defconfig | 1 -
> > arch/sh/configs/sdk7786_defconfig | 1 -
> > arch/sh/configs/urquell_defconfig | 1 -
> > fs/proc/base.c | 6 ----
> > kernel/cgroup/cpuset.c | 40 ------------------------
> > 5 files changed, 49 deletions(-)
> >
> > diff --git a/arch/mips/configs/sb1250_swarm_defconfig b/arch/mips/configs/sb1250_swarm_defconfig
> > index ce855b644bb0..2fbea9c604df 100644
> > --- a/arch/mips/configs/sb1250_swarm_defconfig
> > +++ b/arch/mips/configs/sb1250_swarm_defconfig
> > @@ -3,7 +3,6 @@ CONFIG_HIGH_RES_TIMERS=y
> > CONFIG_LOG_BUF_SHIFT=15
> > CONFIG_CGROUPS=y
> > CONFIG_CPUSETS=y
> > -# CONFIG_PROC_PID_CPUSET is not set
> > CONFIG_CGROUP_CPUACCT=y
> > CONFIG_NAMESPACES=y
> > CONFIG_RELAY=y
> > diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
> > index 7b427c17fbfe..6fe340d4860c 100644
> > --- a/arch/sh/configs/sdk7786_defconfig
> > +++ b/arch/sh/configs/sdk7786_defconfig
> > @@ -13,7 +13,6 @@ CONFIG_CGROUP_DEBUG=y
> > CONFIG_CGROUP_FREEZER=y
> > CONFIG_CGROUP_DEVICE=y
> > CONFIG_CPUSETS=y
> > -# CONFIG_PROC_PID_CPUSET is not set
> > CONFIG_CGROUP_CPUACCT=y
> > CONFIG_CGROUP_MEMCG=y
> > CONFIG_CGROUP_SCHED=y
> > diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig
> > index 445bb451a5ec..c960221a0549 100644
> > --- a/arch/sh/configs/urquell_defconfig
> > +++ b/arch/sh/configs/urquell_defconfig
> > @@ -11,7 +11,6 @@ CONFIG_CGROUP_DEBUG=y
> > CONFIG_CGROUP_FREEZER=y
> > CONFIG_CGROUP_DEVICE=y
> > CONFIG_CPUSETS=y
> > -# CONFIG_PROC_PID_CPUSET is not set
> > CONFIG_CGROUP_CPUACCT=y
> > CONFIG_CGROUP_MEMCG=y
> > CONFIG_CGROUP_SCHED=y
> > diff --git a/fs/proc/base.c b/fs/proc/base.c
> > index 98a031ac2648..8dcd23b9212a 100644
> > --- a/fs/proc/base.c
> > +++ b/fs/proc/base.c
> > @@ -3309,9 +3309,6 @@ static const struct pid_entry tgid_base_stuff[] = {
> > #ifdef CONFIG_LATENCYTOP
> > REG("latency", S_IRUGO, proc_lstats_operations),
> > #endif
> > -#ifdef CONFIG_PROC_PID_CPUSET
> > - ONE("cpuset", S_IRUGO, proc_cpuset_show),
> > -#endif
> > #ifdef CONFIG_CGROUPS
> > ONE("cgroup", S_IRUGO, proc_cgroup_show),
> > #endif
> > @@ -3658,9 +3655,6 @@ static const struct pid_entry tid_base_stuff[] = {
> > #ifdef CONFIG_LATENCYTOP
> > REG("latency", S_IRUGO, proc_lstats_operations),
> > #endif
> > -#ifdef CONFIG_PROC_PID_CPUSET
> > - ONE("cpuset", S_IRUGO, proc_cpuset_show),
> > -#endif
> > #ifdef CONFIG_CGROUPS
> > ONE("cgroup", S_IRUGO, proc_cgroup_show),
> > #endif
> > diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> > index ba36c073304a..908cd7e6efa8 100644
> > --- a/kernel/cgroup/cpuset.c
> > +++ b/kernel/cgroup/cpuset.c
> > @@ -5066,46 +5066,6 @@ void __cpuset_memory_pressure_bump(void)
> > rcu_read_unlock();
> > }
> >
> > -#ifdef CONFIG_PROC_PID_CPUSET
> > -/*
> > - * proc_cpuset_show()
> > - * - Print tasks cpuset path into seq_file.
> > - * - Used for /proc/<pid>/cpuset.
> > - * - No need to task_lock(tsk) on this tsk->cpuset reference, as it
> > - * doesn't really matter if tsk->cpuset changes after we read it,
> > - * and we take cpuset_mutex, keeping cpuset_attach() from changing it
> > - * anyway.
> > - */
> > -int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
> > - struct pid *pid, struct task_struct *tsk)
> > -{
> > - char *buf;
> > - struct cgroup_subsys_state *css;
> > - int retval;
> > -
> > - retval = -ENOMEM;
> > - buf = kmalloc(PATH_MAX, GFP_KERNEL);
> > - if (!buf)
> > - goto out;
> > -
> > - css = task_get_css(tsk, cpuset_cgrp_id);
> > - retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
> > - current->nsproxy->cgroup_ns);
> > - css_put(css);
> > - if (retval == -E2BIG)
> > - retval = -ENAMETOOLONG;
> > - if (retval < 0)
> > - goto out_free;
> > - seq_puts(m, buf);
> > - seq_putc(m, '\n');
> > - retval = 0;
> > -out_free:
> > - kfree(buf);
> > -out:
> > - return retval;
> > -}
> > -#endif /* CONFIG_PROC_PID_CPUSET */
> > -
> > /* Display task mems_allowed in /proc/<pid>/status file. */
> > void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
> > {
>