Re: [PATCH 1/6] memcg: add mem_cgroup parameter to mem_cgroup_page_stat()
From: Minchan Kim
Date: Tue Nov 09 2010 - 17:53:53 EST
On Tue, Nov 9, 2010 at 6:24 PM, Greg Thelen <gthelen@xxxxxxxxxx> wrote:
> This new parameter can be used to query dirty memory usage
> from a given memcg rather than the current task's memcg.
>
> Signed-off-by: Greg Thelen <gthelen@xxxxxxxxxx>
> ---
> include/linux/memcontrol.h | 6 ++++--
> mm/memcontrol.c | 37 +++++++++++++++++++++----------------
> mm/page-writeback.c | 2 +-
> 3 files changed, 26 insertions(+), 19 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 7a3d915..89a9278 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -157,7 +157,8 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
> bool mem_cgroup_has_dirty_limit(void);
> bool mem_cgroup_dirty_info(unsigned long sys_available_mem,
> struct dirty_info *info);
> -long mem_cgroup_page_stat(enum mem_cgroup_nr_pages_item item);
> +long mem_cgroup_page_stat(struct mem_cgroup *mem,
> + enum mem_cgroup_nr_pages_item item);
>
> unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
> gfp_t gfp_mask);
> @@ -351,7 +352,8 @@ static inline bool mem_cgroup_dirty_info(unsigned long sys_available_mem,
> return false;
> }
>
> -static inline long mem_cgroup_page_stat(enum mem_cgroup_nr_pages_item item)
> +static inline long mem_cgroup_page_stat(struct mem_cgroup *mem,
> + enum mem_cgroup_nr_pages_item item)
> {
> return -ENOSYS;
> }
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index d8a06d6..1bff7cf 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1245,22 +1245,20 @@ bool mem_cgroup_dirty_info(unsigned long sys_available_mem,
> unsigned long available_mem;
> struct mem_cgroup *memcg;
> long value;
> + bool valid = false;
>
> if (mem_cgroup_disabled())
> return false;
>
> rcu_read_lock();
> memcg = mem_cgroup_from_task(current);
> - if (!__mem_cgroup_has_dirty_limit(memcg)) {
> - rcu_read_unlock();
> - return false;
> - }
> + if (!__mem_cgroup_has_dirty_limit(memcg))
> + goto done;
> __mem_cgroup_dirty_param(&dirty_param, memcg);
> - rcu_read_unlock();
>
> - value = mem_cgroup_page_stat(MEMCG_NR_DIRTYABLE_PAGES);
> + value = mem_cgroup_page_stat(memcg, MEMCG_NR_DIRTYABLE_PAGES);
> if (value < 0)
> - return false;
> + goto done;
>
> available_mem = min((unsigned long)value, sys_available_mem);
>
> @@ -1280,17 +1278,21 @@ bool mem_cgroup_dirty_info(unsigned long sys_available_mem,
> (dirty_param.dirty_background_ratio *
> available_mem) / 100;
>
> - value = mem_cgroup_page_stat(MEMCG_NR_RECLAIM_PAGES);
> + value = mem_cgroup_page_stat(memcg, MEMCG_NR_RECLAIM_PAGES);
> if (value < 0)
> - return false;
> + goto done;
> info->nr_reclaimable = value;
>
> - value = mem_cgroup_page_stat(MEMCG_NR_WRITEBACK);
> + value = mem_cgroup_page_stat(memcg, MEMCG_NR_WRITEBACK);
> if (value < 0)
> - return false;
> + goto done;
> info->nr_writeback = value;
>
> - return true;
> + valid = true;
> +
> +done:
> + rcu_read_unlock();
> + return valid;
> }
>
> static inline bool mem_cgroup_can_swap(struct mem_cgroup *memcg)
> @@ -1361,20 +1363,23 @@ memcg_hierarchical_free_pages(struct mem_cgroup *mem)
>
> /*
> * mem_cgroup_page_stat() - get memory cgroup file cache statistics
> - * @item: memory statistic item exported to the kernel
> + * @mem: optional memory cgroup to query. If NULL, use current task's
> + * cgroup.
> + * @item: memory statistic item exported to the kernel
> *
> * Return the accounted statistic value or negative value if current task is
> * root cgroup.
> */
> -long mem_cgroup_page_stat(enum mem_cgroup_nr_pages_item item)
> +long mem_cgroup_page_stat(struct mem_cgroup *mem,
> + enum mem_cgroup_nr_pages_item item)
> {
> struct mem_cgroup *iter;
> - struct mem_cgroup *mem;
> long value;
>
> get_online_cpus();
> rcu_read_lock();
> - mem = mem_cgroup_from_task(current);
> + if (!mem)
> + mem = mem_cgroup_from_task(current);
> if (__mem_cgroup_has_dirty_limit(mem)) {
> /*
> * If we're looking for dirtyable pages we need to evaluate
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index a477f59..dc3dbe3 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -135,7 +135,7 @@ static unsigned long dirty_writeback_pages(void)
> {
> unsigned long ret;
>
> - ret = mem_cgroup_page_stat(MEMCG_NR_DIRTY_WRITEBACK_PAGES);
> + ret = mem_cgroup_page_stat(NULL, MEMCG_NR_DIRTY_WRITEBACK_PAGES);
> if ((long)ret < 0)
> ret = global_page_state(NR_UNSTABLE_NFS) +
> global_page_state(NR_WRITEBACK);
> --
> 1.7.3.1
>
I didn't look at further patches so It might be changed.
Now all of caller of mem_cgroup_page_stat except only
dirty_writeback_pages hold rcu_read_lock.
And mem_cgroup_page_stat itself hold rcu_read_lock again.
Couldn't we remove duplicated rcu lock by adding rcu_read_lock in
dirty_writeback_pages for the consistency?
--
Kind regards,
Minchan Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/