Re: [PATCH v6 07/19] mm: memcg/slab: allocate obj_cgroups for non-root slab pages

From: Shakeel Butt
Date: Fri Jun 19 2020 - 20:32:01 EST


On Fri, Jun 19, 2020 at 5:25 PM Roman Gushchin <guro@xxxxxx> wrote:
>
> On Fri, Jun 19, 2020 at 09:36:16AM -0700, Shakeel Butt wrote:
> > On Mon, Jun 8, 2020 at 4:07 PM Roman Gushchin <guro@xxxxxx> wrote:
> > >
> > > Allocate and release memory to store obj_cgroup pointers for each
> > > non-root slab page. Reuse page->mem_cgroup pointer to store a pointer
> > > to the allocated space.
> > >
> > > To distinguish between obj_cgroups and memcg pointers in case
> > > when it's not obvious which one is used (as in page_cgroup_ino()),
> > > let's always set the lowest bit in the obj_cgroup case.
> > >
> >
> > I think the commit message should talk about the potential overhead
> > (i.e an extra pointer for each object) along with the justifications
> > (i.e. less internal fragmentation and potentially more savings than
> > the overhead).
>
> How about adding the following chunk? I don't like forward links in
> commit messages, so maybe putting it into the cover letter?
>
> This commit temporarily increases the memory footprint of the kernel memory
> accounting. To store obj_cgroup pointers we'll need a place for an
> objcg_pointer for each allocated object. However, the following patches
> in the series will enable sharing of slab pages between memory cgroups,
> which will dramatically increase the total slab utilization. And the final
> memory footprint will be significantly smaller than before.
>

This looks good to me.

> >
> > > Signed-off-by: Roman Gushchin <guro@xxxxxx>
> > > Reviewed-by: Vlastimil Babka <vbabka@xxxxxxx>
> > > ---
> > > include/linux/mm_types.h | 5 +++-
> > > include/linux/slab_def.h | 6 +++++
> > > include/linux/slub_def.h | 5 ++++
> > > mm/memcontrol.c | 17 +++++++++++---
> > > mm/slab.h | 49 ++++++++++++++++++++++++++++++++++++++++
> > > 5 files changed, 78 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> > > index 64ede5f150dc..0277fbab7c93 100644
> > > --- a/include/linux/mm_types.h
> > > +++ b/include/linux/mm_types.h
> > > @@ -198,7 +198,10 @@ struct page {
> > > atomic_t _refcount;
> > >
> > > #ifdef CONFIG_MEMCG
> > > - struct mem_cgroup *mem_cgroup;
> > > + union {
> > > + struct mem_cgroup *mem_cgroup;
> > > + struct obj_cgroup **obj_cgroups;
> > > + };
> > > #endif
> > >
> > > /*
> > > diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
> > > index abc7de77b988..ccda7b9669a5 100644
> > > --- a/include/linux/slab_def.h
> > > +++ b/include/linux/slab_def.h
> > > @@ -114,4 +114,10 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
> > > return reciprocal_divide(offset, cache->reciprocal_buffer_size);
> > > }
> > >
> > > +static inline int objs_per_slab_page(const struct kmem_cache *cache,
> > > + const struct page *page)
> > > +{
> > > + return cache->num;
> > > +}
> > > +
> > > #endif /* _LINUX_SLAB_DEF_H */
> > > diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
> > > index 30e91c83d401..f87302dcfe8c 100644
> > > --- a/include/linux/slub_def.h
> > > +++ b/include/linux/slub_def.h
> > > @@ -198,4 +198,9 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
> > > return __obj_to_index(cache, page_address(page), obj);
> > > }
> > >
> > > +static inline int objs_per_slab_page(const struct kmem_cache *cache,
> > > + const struct page *page)
> > > +{
> > > + return page->objects;
> > > +}
> > > #endif /* _LINUX_SLUB_DEF_H */
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 7ff66275966c..2020c7542aa1 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -569,10 +569,21 @@ ino_t page_cgroup_ino(struct page *page)
> > > unsigned long ino = 0;
> > >
> > > rcu_read_lock();
> > > - if (PageSlab(page) && !PageTail(page))
> > > + if (PageSlab(page) && !PageTail(page)) {
> > > memcg = memcg_from_slab_page(page);
> > > - else
> > > - memcg = READ_ONCE(page->mem_cgroup);
> > > + } else {
> > > + memcg = page->mem_cgroup;
> > > +
> > > + /*
> > > + * The lowest bit set means that memcg isn't a valid
> > > + * memcg pointer, but a obj_cgroups pointer.
> > > + * In this case the page is shared and doesn't belong
> > > + * to any specific memory cgroup.
> > > + */
> > > + if ((unsigned long) memcg & 0x1UL)
> > > + memcg = NULL;
> > > + }
> > > +
> > > while (memcg && !(memcg->css.flags & CSS_ONLINE))
> > > memcg = parent_mem_cgroup(memcg);
> > > if (memcg)
> > > diff --git a/mm/slab.h b/mm/slab.h
> > > index 8a574d9361c1..a1633ea15fbf 100644
> > > --- a/mm/slab.h
> > > +++ b/mm/slab.h
> > > @@ -319,6 +319,18 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
> > > return s->memcg_params.root_cache;
> > > }
> > >
> > > +static inline struct obj_cgroup **page_obj_cgroups(struct page *page)
> > > +{
> > > + /*
> > > + * page->mem_cgroup and page->obj_cgroups are sharing the same
> > > + * space. To distinguish between them in case we don't know for sure
> > > + * that the page is a slab page (e.g. page_cgroup_ino()), let's
> > > + * always set the lowest bit of obj_cgroups.
> > > + */
> > > + return (struct obj_cgroup **)
> > > + ((unsigned long)page->obj_cgroups & ~0x1UL);
> > > +}
> > > +
> > > /*
> > > * Expects a pointer to a slab page. Please note, that PageSlab() check
> > > * isn't sufficient, as it returns true also for tail compound slab pages,
> > > @@ -406,6 +418,26 @@ static __always_inline void memcg_uncharge_slab(struct page *page, int order,
> > > percpu_ref_put_many(&s->memcg_params.refcnt, nr_pages);
> > > }
> > >
> > > +static inline int memcg_alloc_page_obj_cgroups(struct page *page,
> > > + struct kmem_cache *s, gfp_t gfp)
> > > +{
> > > + unsigned int objects = objs_per_slab_page(s, page);
> > > + void *vec;
> > > +
> > > + vec = kcalloc(objects, sizeof(struct obj_cgroup *), gfp);
> >
> > Should the above allocation be on the same node as the page?
>
> Yeah, it's a clever idea. The following patch should do the trick.
> Andrew, can you, please, squash this in?
>
> Thank you!
>
>

Reviewed-by: Shakeel Butt <shakeelb@xxxxxxxxxx>

> diff --git a/mm/slab.h b/mm/slab.h
> index 0a31600a0f5c..2a036eefbd7e 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -233,7 +233,8 @@ static inline int memcg_alloc_page_obj_cgroups(struct page *page,
> unsigned int objects = objs_per_slab_page(s, page);
> void *vec;
>
> - vec = kcalloc(objects, sizeof(struct obj_cgroup *), gfp);
> + vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
> + page_to_nid(page));
> if (!vec)
> return -ENOMEM;
>