Re: [PATCH] mm: vmscan: Replace shrinker_rwsem trylocks with SRCU protection

From: Michal Hocko
Date: Mon Sep 27 2021 - 04:36:41 EST


On Mon 27-09-21 00:48:23, Sultan Alsawaf wrote:
> From: Sultan Alsawaf <sultan@xxxxxxxxxxxxxxx>
>
> The shrinker rwsem is problematic because the actual shrinking path must
> back off when contention appears, causing some or all shrinkers to be
> skipped. This can be especially bad when shrinkers are frequently
> registered and unregistered. A high frequency of shrinker registrations/
> unregistrations can effectively DoS the shrinker mechanism, rendering it
> useless.

Can you be more specific about those scenarios please?

> Using SRCU to protect iteration through the shrinker list and idr
> eliminates this issue entirely. Now, shrinking can happen concurrently
> with shrinker registrations/unregistrations.

I have a vague recollection that this approach has been proposed in the
past. Have you checked previous attempts?

> Signed-off-by: Sultan Alsawaf <sultan@xxxxxxxxxxxxxxx>
> ---
> mm/vmscan.c | 68 ++++++++++++++++++++++++++++-------------------------
> 1 file changed, 36 insertions(+), 32 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 74296c2d1fed..3dea927be5ad 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -190,6 +190,7 @@ static void set_task_reclaim_state(struct task_struct *task,
>
> static LIST_HEAD(shrinker_list);
> static DECLARE_RWSEM(shrinker_rwsem);
> +DEFINE_STATIC_SRCU(shrinker_srcu);
>
> #ifdef CONFIG_MEMCG
> static int shrinker_nr_max;
> @@ -212,6 +213,20 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
> lockdep_is_held(&shrinker_rwsem));
> }
>
> +static struct shrinker_info *shrinker_info_srcu(struct mem_cgroup *memcg,
> + int nid)
> +{
> + return srcu_dereference(memcg->nodeinfo[nid]->shrinker_info,
> + &shrinker_srcu);
> +}
> +
> +static void free_shrinker_info_srcu(struct rcu_head *head)
> +{
> + struct shrinker_info *info = container_of(head, typeof(*info), rcu);
> +
> + kvfree_rcu(info, rcu);
> +}
> +
> static int expand_one_shrinker_info(struct mem_cgroup *memcg,
> int map_size, int defer_size,
> int old_map_size, int old_defer_size)
> @@ -244,7 +259,12 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg,
> defer_size - old_defer_size);
>
> rcu_assign_pointer(pn->shrinker_info, new);
> - kvfree_rcu(old, rcu);
> +
> + /*
> + * Shrinker info is used under both SRCU and regular RCU, so
> + * synchronize the free against both of them.
> + */
> + call_srcu(&shrinker_srcu, &old->rcu, free_shrinker_info_srcu);
> }
>
> return 0;
> @@ -357,7 +377,6 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
> return -ENOSYS;
>
> down_write(&shrinker_rwsem);
> - /* This may call shrinker, so it must use down_read_trylock() */
> id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
> if (id < 0)
> goto unlock;
> @@ -391,7 +410,7 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
> {
> struct shrinker_info *info;
>
> - info = shrinker_info_protected(memcg, nid);
> + info = shrinker_info_srcu(memcg, nid);
> return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0);
> }
>
> @@ -400,7 +419,7 @@ static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
> {
> struct shrinker_info *info;
>
> - info = shrinker_info_protected(memcg, nid);
> + info = shrinker_info_srcu(memcg, nid);
> return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]);
> }
>
> @@ -641,6 +660,7 @@ void free_prealloced_shrinker(struct shrinker *shrinker)
> down_write(&shrinker_rwsem);
> unregister_memcg_shrinker(shrinker);
> up_write(&shrinker_rwsem);
> + synchronize_srcu(&shrinker_srcu);
> return;
> }
>
> @@ -651,7 +671,7 @@ void free_prealloced_shrinker(struct shrinker *shrinker)
> void register_shrinker_prepared(struct shrinker *shrinker)
> {
> down_write(&shrinker_rwsem);
> - list_add_tail(&shrinker->list, &shrinker_list);
> + list_add_tail_rcu(&shrinker->list, &shrinker_list);
> shrinker->flags |= SHRINKER_REGISTERED;
> up_write(&shrinker_rwsem);
> }
> @@ -676,11 +696,12 @@ void unregister_shrinker(struct shrinker *shrinker)
> return;
>
> down_write(&shrinker_rwsem);
> - list_del(&shrinker->list);
> + list_del_rcu(&shrinker->list);
> shrinker->flags &= ~SHRINKER_REGISTERED;
> if (shrinker->flags & SHRINKER_MEMCG_AWARE)
> unregister_memcg_shrinker(shrinker);
> up_write(&shrinker_rwsem);
> + synchronize_srcu(&shrinker_srcu);
>
> kfree(shrinker->nr_deferred);
> shrinker->nr_deferred = NULL;
> @@ -792,15 +813,13 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
> {
> struct shrinker_info *info;
> unsigned long ret, freed = 0;
> - int i;
> + int i, srcu_idx;
>
> if (!mem_cgroup_online(memcg))
> return 0;
>
> - if (!down_read_trylock(&shrinker_rwsem))
> - return 0;
> -
> - info = shrinker_info_protected(memcg, nid);
> + srcu_idx = srcu_read_lock(&shrinker_srcu);
> + info = shrinker_info_srcu(memcg, nid);
> if (unlikely(!info))
> goto unlock;
>
> @@ -850,14 +869,9 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
> set_shrinker_bit(memcg, nid, i);
> }
> freed += ret;
> -
> - if (rwsem_is_contended(&shrinker_rwsem)) {
> - freed = freed ? : 1;
> - break;
> - }
> }
> unlock:
> - up_read(&shrinker_rwsem);
> + srcu_read_unlock(&shrinker_srcu, srcu_idx);
> return freed;
> }
> #else /* CONFIG_MEMCG */
> @@ -894,6 +908,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
> {
> unsigned long ret, freed = 0;
> struct shrinker *shrinker;
> + int srcu_idx;
>
> /*
> * The root memcg might be allocated even though memcg is disabled
> @@ -905,10 +920,9 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
> if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
> return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
>
> - if (!down_read_trylock(&shrinker_rwsem))
> - goto out;
> -
> - list_for_each_entry(shrinker, &shrinker_list, list) {
> + srcu_idx = srcu_read_lock(&shrinker_srcu);
> + list_for_each_entry_srcu(shrinker, &shrinker_list, list,
> + srcu_read_lock_held(&shrinker_srcu)) {
> struct shrink_control sc = {
> .gfp_mask = gfp_mask,
> .nid = nid,
> @@ -919,19 +933,9 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
> if (ret == SHRINK_EMPTY)
> ret = 0;
> freed += ret;
> - /*
> - * Bail out if someone want to register a new shrinker to
> - * prevent the registration from being stalled for long periods
> - * by parallel ongoing shrinking.
> - */
> - if (rwsem_is_contended(&shrinker_rwsem)) {
> - freed = freed ? : 1;
> - break;
> - }
> }
> + srcu_read_unlock(&shrinker_srcu, srcu_idx);
>
> - up_read(&shrinker_rwsem);
> -out:
> cond_resched();
> return freed;
> }
> --
> 2.33.0

--
Michal Hocko
SUSE Labs