Re: [PATCH 01/13] cgroup: improve css_from_dir() intocss_tryget_from_dir()
From: Michal Hocko
Date: Wed Jan 29 2014 - 04:24:48 EST
On Tue 28-01-14 18:54:33, Tejun Heo wrote:
> css_from_dir() returns the matching css (cgroup_subsys_state) given a
> dentry and subsystem. The function doesn't pin the css before
> returning and requires the caller to be holding RCU read lock or
> cgroup_mutex and handling pinning on the caller side.
>
> Given that users of the function are likely to want to pin the
> returned css (both existing users do) and that getting and putting
> css's are very cheap, there's no reason for the interface to be tricky
> like this.
>
> Rename css_from_dir() to css_tryget_from_dir() and make it try to pin
> the found css and return it only if pinning succeeded. The callers
> are updated so that they no longer do RCU locking and pinning around
> the function and just use the returned css.
>
> This will also ease converting cgroup to kernfs.
>
> Signed-off-by: Tejun Heo <tj@xxxxxxxxxx>
> Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
> Cc: Frederic Weisbecker <fweisbec@xxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
> Cc: Michal Hocko <mhocko@xxxxxxx>
> Cc: Balbir Singh <bsingharora@xxxxxxxxx>
> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
yes, this is handy.
For memcg part
Acked-by: Michal Hocko <mhocko@xxxxxxx>
Thanks!
> ---
> include/linux/cgroup.h | 4 ++--
> kernel/cgroup.c | 25 ++++++++++++++++---------
> kernel/events/core.c | 17 +----------------
> mm/memcontrol.c | 16 +++++++---------
> 4 files changed, 26 insertions(+), 36 deletions(-)
>
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index 198c7fc..2255639 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -823,8 +823,8 @@ int css_scan_tasks(struct cgroup_subsys_state *css,
> int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
> int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
>
> -struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
> - struct cgroup_subsys *ss);
> +struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry,
> + struct cgroup_subsys *ss);
>
> #else /* !CONFIG_CGROUPS */
>
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 3d92fd0..823e250 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -4964,28 +4964,35 @@ static int __init cgroup_disable(char *str)
> __setup("cgroup_disable=", cgroup_disable);
>
> /**
> - * css_from_dir - get corresponding css from the dentry of a cgroup dir
> + * css_tryget_from_dir - get corresponding css from the dentry of a cgroup dir
> * @dentry: directory dentry of interest
> * @ss: subsystem of interest
> *
> - * Must be called under cgroup_mutex or RCU read lock. The caller is
> - * responsible for pinning the returned css if it needs to be accessed
> - * outside the critical section.
> + * If @dentry is a directory for a cgroup which has @ss enabled on it, try
> + * to get the corresponding css and return it. If such css doesn't exist
> + * or can't be pinned, an ERR_PTR value is returned.
> */
> -struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
> - struct cgroup_subsys *ss)
> +struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry,
> + struct cgroup_subsys *ss)
> {
> struct cgroup *cgrp;
> -
> - cgroup_assert_mutex_or_rcu_locked();
> + struct cgroup_subsys_state *css;
>
> /* is @dentry a cgroup dir? */
> if (!dentry->d_inode ||
> dentry->d_inode->i_op != &cgroup_dir_inode_operations)
> return ERR_PTR(-EBADF);
>
> + rcu_read_lock();
> +
> cgrp = __d_cgrp(dentry);
> - return cgroup_css(cgrp, ss) ?: ERR_PTR(-ENOENT);
> + css = cgroup_css(cgrp, ss);
> +
> + if (!css || !css_tryget(css))
> + css = ERR_PTR(-ENOENT);
> +
> + rcu_read_unlock();
> + return css;
> }
>
> /**
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 6490373..a3c3ab5 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -370,11 +370,6 @@ perf_cgroup_match(struct perf_event *event)
> event->cgrp->css.cgroup);
> }
>
> -static inline bool perf_tryget_cgroup(struct perf_event *event)
> -{
> - return css_tryget(&event->cgrp->css);
> -}
> -
> static inline void perf_put_cgroup(struct perf_event *event)
> {
> css_put(&event->cgrp->css);
> @@ -593,9 +588,7 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
> if (!f.file)
> return -EBADF;
>
> - rcu_read_lock();
> -
> - css = css_from_dir(f.file->f_dentry, &perf_event_cgrp_subsys);
> + css = css_tryget_from_dir(f.file->f_dentry, &perf_event_cgrp_subsys);
> if (IS_ERR(css)) {
> ret = PTR_ERR(css);
> goto out;
> @@ -604,13 +597,6 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
> cgrp = container_of(css, struct perf_cgroup, css);
> event->cgrp = cgrp;
>
> - /* must be done before we fput() the file */
> - if (!perf_tryget_cgroup(event)) {
> - event->cgrp = NULL;
> - ret = -ENOENT;
> - goto out;
> - }
> -
> /*
> * all events in a group must monitor
> * the same cgroup because a task belongs
> @@ -621,7 +607,6 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
> ret = -EINVAL;
> }
> out:
> - rcu_read_unlock();
> fdput(f);
> return ret;
> }
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index d4a8c40..ab4e412 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -6184,17 +6184,15 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
> * automatically removed on cgroup destruction but the removal is
> * asynchronous, so take an extra ref on @css.
> */
> - rcu_read_lock();
> -
> + cfile_css = css_tryget_from_dir(cfile.file->f_dentry->d_parent,
> + &memory_cgrp_subsys);
> ret = -EINVAL;
> - cfile_css = css_from_dir(cfile.file->f_dentry->d_parent,
> - &memory_cgrp_subsys);
> - if (cfile_css == css && css_tryget(css))
> - ret = 0;
> -
> - rcu_read_unlock();
> - if (ret)
> + if (IS_ERR(cfile_css))
> goto out_put_cfile;
> + if (cfile_css != css) {
> + css_put(cfile_css);
> + goto out_put_cfile;
> + }
>
> ret = event->register_event(memcg, event->eventfd, buffer);
> if (ret)
> --
> 1.8.5.3
>
--
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/