Re: [v3 PATCH 08/11] mm: vmscan: use per memcg nr_deferred of shrinker

From: Yang Shi
Date: Thu Jan 07 2021 - 12:35:05 EST


On Wed, Jan 6, 2021 at 4:17 PM Roman Gushchin <guro@xxxxxx> wrote:
>
> On Tue, Jan 05, 2021 at 02:58:14PM -0800, Yang Shi wrote:
> > Use per memcg's nr_deferred for memcg aware shrinkers. The shrinker's nr_deferred
> > will be used in the following cases:
> > 1. Non memcg aware shrinkers
> > 2. !CONFIG_MEMCG
>
> It's better to depend on CONFIG_MEMCG_KMEM rather than CONFIG_MEMCG.
> Without CONFIG_MEMCG_KMEM the kernel memory accounting is off, so
> per-memcg shrinkers do not make any sense. The same applies for many
> places in the patchset.

That is because not only does kmem use shrinker. The deferred split
THP does get split by shrinker and it is memcg aware as well. And it
is not the conventional "kmem".

Actually it was CONFIG_MEMCG_KMEM before, it was changed to
CONFIG_MEMCG by memcg-aware deferred split THP patches.

>
> PS I like this version of the patchset much more than the previous one,
> so it looks like it's going in the right direction.

Thanks a lot for the help from you folks.

>
> Thanks!
>
>
> > 3. memcg is disabled by boot parameter
> >
> > Signed-off-by: Yang Shi <shy828301@xxxxxxxxx>
> > ---
> > mm/vmscan.c | 81 +++++++++++++++++++++++++++++++++++++++++++++--------
> > 1 file changed, 69 insertions(+), 12 deletions(-)
> >
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > index 72259253e414..f20ed8e928c2 100644
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -372,6 +372,27 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
> > up_write(&shrinker_rwsem);
> > }
> >
> > +static long count_nr_deferred_memcg(int nid, struct shrinker *shrinker,
> > + struct mem_cgroup *memcg)
> > +{
> > + struct memcg_shrinker_info *info;
> > +
> > + info = rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
> > + true);
> > + return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0);
> > +}
> > +
> > +static long set_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
> > + struct mem_cgroup *memcg)
> > +{
> > + struct memcg_shrinker_info *info;
> > +
> > + info = rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
> > + true);
> > +
> > + return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]);
> > +}
> > +
> > static bool cgroup_reclaim(struct scan_control *sc)
> > {
> > return sc->target_mem_cgroup;
> > @@ -410,6 +431,18 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
> > {
> > }
> >
> > +static long count_nr_deferred_memcg(int nid, struct shrinker *shrinker,
> > + struct mem_cgroup *memcg)
> > +{
> > + return 0;
> > +}
> > +
> > +static long set_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
> > + struct mem_cgroup *memcg)
> > +{
> > + return 0;
> > +}
> > +
> > static bool cgroup_reclaim(struct scan_control *sc)
> > {
> > return false;
> > @@ -421,6 +454,39 @@ static bool writeback_throttling_sane(struct scan_control *sc)
> > }
> > #endif
> >
> > +static long count_nr_deferred(struct shrinker *shrinker,
> > + struct shrink_control *sc)
> > +{
> > + int nid = sc->nid;
> > +
> > + if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
> > + nid = 0;
> > +
> > + if (sc->memcg &&
> > + (shrinker->flags & SHRINKER_MEMCG_AWARE))
> > + return count_nr_deferred_memcg(nid, shrinker,
> > + sc->memcg);
> > +
> > + return atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
> > +}
> > +
> > +
> > +static long set_nr_deferred(long nr, struct shrinker *shrinker,
> > + struct shrink_control *sc)
> > +{
> > + int nid = sc->nid;
> > +
> > + if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
> > + nid = 0;
> > +
> > + if (sc->memcg &&
> > + (shrinker->flags & SHRINKER_MEMCG_AWARE))
> > + return set_nr_deferred_memcg(nr, nid, shrinker,
> > + sc->memcg);
> > +
> > + return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]);
> > +}
> > +
> > /*
> > * This misses isolated pages which are not accounted for to save counters.
> > * As the data only determines if reclaim or compaction continues, it is
> > @@ -558,14 +624,10 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
> > long freeable;
> > long nr;
> > long new_nr;
> > - int nid = shrinkctl->nid;
> > long batch_size = shrinker->batch ? shrinker->batch
> > : SHRINK_BATCH;
> > long scanned = 0, next_deferred;
> >
> > - if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
> > - nid = 0;
> > -
> > freeable = shrinker->count_objects(shrinker, shrinkctl);
> > if (freeable == 0 || freeable == SHRINK_EMPTY)
> > return freeable;
> > @@ -575,7 +637,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
> > * and zero it so that other concurrent shrinker invocations
> > * don't also do this scanning work.
> > */
> > - nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
> > + nr = count_nr_deferred(shrinker, shrinkctl);
> >
> > total_scan = nr;
> > if (shrinker->seeks) {
> > @@ -666,14 +728,9 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
> > next_deferred = 0;
> > /*
> > * move the unused scan count back into the shrinker in a
> > - * manner that handles concurrent updates. If we exhausted the
> > - * scan, there is no need to do an update.
> > + * manner that handles concurrent updates.
> > */
> > - if (next_deferred > 0)
> > - new_nr = atomic_long_add_return(next_deferred,
> > - &shrinker->nr_deferred[nid]);
> > - else
> > - new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);
> > + new_nr = set_nr_deferred(next_deferred, shrinker, shrinkctl);
> >
> > trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan);
> > return freed;
> > --
> > 2.26.2
> >