Re: [PATCH -V7 07/14] mm/page_cgroup: Make page_cgroup point to thecgroup rather than the mem_cgroup

From: Kamezawa Hiroyuki
Date: Mon Jun 04 2012 - 21:47:07 EST


(2012/05/30 23:38), Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V"<aneesh.kumar@xxxxxxxxxxxxxxxxxx>
>
> We will use it later to make page_cgroup track the hugetlb cgroup information.
>
> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@xxxxxxxxxxxxxxxxxx>
> ---
> include/linux/mmzone.h | 2 +-
> include/linux/page_cgroup.h | 8 ++++----
> init/Kconfig | 4 ++++
> mm/Makefile | 3 ++-
> mm/memcontrol.c | 42 +++++++++++++++++++++++++-----------------
> 5 files changed, 36 insertions(+), 23 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 2427706..2483cc5 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -1052,7 +1052,7 @@ struct mem_section {
>
> /* See declaration of similar field in struct zone */
> unsigned long *pageblock_flags;
> -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> +#ifdef CONFIG_PAGE_CGROUP
> /*
> * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
> * section. (see memcontrol.h/page_cgroup.h about this.)
> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
> index a88cdba..7bbfe37 100644
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -12,7 +12,7 @@ enum {
> #ifndef __GENERATING_BOUNDS_H
> #include<generated/bounds.h>
>
> -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> +#ifdef CONFIG_PAGE_CGROUP
> #include<linux/bit_spinlock.h>
>
> /*
> @@ -24,7 +24,7 @@ enum {
> */
> struct page_cgroup {
> unsigned long flags;
> - struct mem_cgroup *mem_cgroup;
> + struct cgroup *cgroup;
> };
>

This patch seems very bad.

- What is the performance impact to memcg ? Doesn't this add extra overheads
to memcg lookup ?
- Hugetlb reuquires much more smaller number of tracking information rather
than memcg requires. I guess you can record the information into page->private
if you want.
- This may prevent us from the work 'reducing size of page_cgroup'

So, strong Nack to this. I guess you can use page->private or some entries in
struct page, you have many pages per accounting units. Please make an effort
to avoid using page_cgroup.

Thanks,
-Kame



> void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
> @@ -82,7 +82,7 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc)
> bit_spin_unlock(PCG_LOCK,&pc->flags);
> }
>
> -#else /* CONFIG_CGROUP_MEM_RES_CTLR */
> +#else /* CONFIG_PAGE_CGROUP */
> struct page_cgroup;
>
> static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
> @@ -102,7 +102,7 @@ static inline void __init page_cgroup_init_flatmem(void)
> {
> }
>
> -#endif /* CONFIG_CGROUP_MEM_RES_CTLR */
> +#endif /* CONFIG_PAGE_CGROUP */
>
> #include<linux/swap.h>
>
> diff --git a/init/Kconfig b/init/Kconfig
> index 81816b8..1363203 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -687,10 +687,14 @@ config RESOURCE_COUNTERS
> This option enables controller independent resource accounting
> infrastructure that works with cgroups.
>
> +config PAGE_CGROUP
> + bool
> +
> config CGROUP_MEM_RES_CTLR
> bool "Memory Resource Controller for Control Groups"
> depends on RESOURCE_COUNTERS
> select MM_OWNER
> + select PAGE_CGROUP
> help
> Provides a memory resource controller that manages both anonymous
> memory and page cache. (See Documentation/cgroups/memory.txt)
> diff --git a/mm/Makefile b/mm/Makefile
> index a156285..a70f9a9 100644
> --- a/mm/Makefile
> +++ b/mm/Makefile
> @@ -47,7 +47,8 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
> obj-$(CONFIG_MIGRATION) += migrate.o
> obj-$(CONFIG_QUICKLIST) += quicklist.o
> obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
> -obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
> +obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o
> +obj-$(CONFIG_PAGE_CGROUP) += page_cgroup.o
> obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
> obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
> obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index ac35bcc..6df019b 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -864,6 +864,8 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
>
> struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
> {
> + if (!cont)
> + return NULL;
> return container_of(cgroup_subsys_state(cont,
> mem_cgroup_subsys_id), struct mem_cgroup,
> css);
> @@ -1097,7 +1099,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
> return&zone->lruvec;
>
> pc = lookup_page_cgroup(page);
> - memcg = pc->mem_cgroup;
> + memcg = mem_cgroup_from_cont(pc->cgroup);
>
> /*
> * Surreptitiously switch any uncharged offlist page to root:
> @@ -1108,8 +1110,10 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
> * under page_cgroup lock: between them, they make all uses
> * of pc->mem_cgroup safe.
> */
> - if (!PageLRU(page)&& !PageCgroupUsed(pc)&& memcg != root_mem_cgroup)
> - pc->mem_cgroup = memcg = root_mem_cgroup;
> + if (!PageLRU(page)&& !PageCgroupUsed(pc)&& memcg != root_mem_cgroup) {
> + memcg = root_mem_cgroup;
> + pc->cgroup = memcg->css.cgroup;
> + }
>
> mz = page_cgroup_zoneinfo(memcg, page);
> return&mz->lruvec;
> @@ -1889,12 +1893,14 @@ static bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask,
> void __mem_cgroup_begin_update_page_stat(struct page *page,
> bool *locked, unsigned long *flags)
> {
> + struct cgroup *cgroup;
> struct mem_cgroup *memcg;
> struct page_cgroup *pc;
>
> pc = lookup_page_cgroup(page);
> again:
> - memcg = pc->mem_cgroup;
> + cgroup = pc->cgroup;
> + memcg = mem_cgroup_from_cont(cgroup);
> if (unlikely(!memcg || !PageCgroupUsed(pc)))
> return;
> /*
> @@ -1907,7 +1913,7 @@ again:
> return;
>
> move_lock_mem_cgroup(memcg, flags);
> - if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
> + if (cgroup != pc->cgroup || !PageCgroupUsed(pc)) {
> move_unlock_mem_cgroup(memcg, flags);
> goto again;
> }
> @@ -1923,7 +1929,7 @@ void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
> * lock is held because a routine modifies pc->mem_cgroup
> * should take move_lock_page_cgroup().
> */
> - move_unlock_mem_cgroup(pc->mem_cgroup, flags);
> + move_unlock_mem_cgroup(mem_cgroup_from_cont(pc->cgroup), flags);
> }
>
> void mem_cgroup_update_page_stat(struct page *page,
> @@ -1936,7 +1942,7 @@ void mem_cgroup_update_page_stat(struct page *page,
> if (mem_cgroup_disabled())
> return;
>
> - memcg = pc->mem_cgroup;
> + memcg = mem_cgroup_from_cont(pc->cgroup);
> if (unlikely(!memcg || !PageCgroupUsed(pc)))
> return;
>
> @@ -2444,7 +2450,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
> pc = lookup_page_cgroup(page);
> lock_page_cgroup(pc);
> if (PageCgroupUsed(pc)) {
> - memcg = pc->mem_cgroup;
> + memcg = mem_cgroup_from_cont(pc->cgroup);
> if (memcg&& !css_tryget(&memcg->css))
> memcg = NULL;
> } else if (PageSwapCache(page)) {
> @@ -2491,14 +2497,15 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
> zone = page_zone(page);
> spin_lock_irq(&zone->lru_lock);
> if (PageLRU(page)) {
> - lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
> + lruvec = mem_cgroup_zone_lruvec(zone,
> + mem_cgroup_from_cont(pc->cgroup));
> ClearPageLRU(page);
> del_page_from_lru_list(page, lruvec, page_lru(page));
> was_on_lru = true;
> }
> }
>
> - pc->mem_cgroup = memcg;
> + pc->cgroup = memcg->css.cgroup;
> /*
> * We access a page_cgroup asynchronously without lock_page_cgroup().
> * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
> @@ -2511,7 +2518,8 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
>
> if (lrucare) {
> if (was_on_lru) {
> - lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
> + lruvec = mem_cgroup_zone_lruvec(zone,
> + mem_cgroup_from_cont(pc->cgroup));
> VM_BUG_ON(PageLRU(page));
> SetPageLRU(page);
> add_page_to_lru_list(page, lruvec, page_lru(page));
> @@ -2601,7 +2609,7 @@ static int mem_cgroup_move_account(struct page *page,
> lock_page_cgroup(pc);
>
> ret = -EINVAL;
> - if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
> + if (!PageCgroupUsed(pc) || pc->cgroup != from->css.cgroup)
> goto unlock;
>
> move_lock_mem_cgroup(from,&flags);
> @@ -2616,7 +2624,7 @@ static int mem_cgroup_move_account(struct page *page,
> mem_cgroup_charge_statistics(from, anon, -nr_pages);
>
> /* caller should have done css_get */
> - pc->mem_cgroup = to;
> + pc->cgroup = to->css.cgroup;
> mem_cgroup_charge_statistics(to, anon, nr_pages);
> /*
> * We charges against "to" which may not have any tasks. Then, "to"
> @@ -2937,7 +2945,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
>
> lock_page_cgroup(pc);
>
> - memcg = pc->mem_cgroup;
> + memcg = mem_cgroup_from_cont(pc->cgroup);
>
> if (!PageCgroupUsed(pc))
> goto unlock_out;
> @@ -3183,7 +3191,7 @@ int mem_cgroup_prepare_migration(struct page *page,
> pc = lookup_page_cgroup(page);
> lock_page_cgroup(pc);
> if (PageCgroupUsed(pc)) {
> - memcg = pc->mem_cgroup;
> + memcg = mem_cgroup_from_cont(pc->cgroup);
> css_get(&memcg->css);
> /*
> * At migrating an anonymous page, its mapcount goes down
> @@ -3328,7 +3336,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
> /* fix accounting on old pages */
> lock_page_cgroup(pc);
> if (PageCgroupUsed(pc)) {
> - memcg = pc->mem_cgroup;
> + memcg = mem_cgroup_from_cont(pc->cgroup);
> mem_cgroup_charge_statistics(memcg, false, -1);
> ClearPageCgroupUsed(pc);
> }
> @@ -5135,7 +5143,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
> * mem_cgroup_move_account() checks the pc is valid or not under
> * the lock.
> */
> - if (PageCgroupUsed(pc)&& pc->mem_cgroup == mc.from) {
> + if (PageCgroupUsed(pc)&& pc->cgroup == mc.from->css.cgroup) {
> ret = MC_TARGET_PAGE;
> if (target)
> target->page = page;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/