[PATCH 2/5] mm: vmscan: make memcg slab shrink lockless
From: Qi Zheng
Date: Mon Feb 20 2023 - 04:17:17 EST
Like global slab shrink, since commit 1cd0bd06093c
("rcu: Remove CONFIG_SRCU"), it's time to use SRCU
to protect readers who previously held shrinker_rwsem.
We can test with the following script:
```
DIR="/root/shrinker/memcg/mnt"
do_create()
{
mkdir /sys/fs/cgroup/memory/test
echo 200M > /sys/fs/cgroup/memory/test/memory.limit_in_bytes
for i in `seq 0 $1`;
do
mkdir /sys/fs/cgroup/memory/test/$i;
echo $$ > /sys/fs/cgroup/memory/test/$i/cgroup.procs;
mkdir -p $DIR/$i;
done
}
do_mount()
{
for i in `seq $1 $2`;
do
mount -t tmpfs $i $DIR/$i;
done
}
do_touch()
{
for i in `seq $1 $2`;
do
echo $$ > /sys/fs/cgroup/memory/test/$i/cgroup.procs;
dd if=/dev/zero of=$DIR/$i/file$i bs=1M count=1 &
done
}
do_create 2000
do_mount 0 2000
do_touch 0 1000
```
Before applying:
46.60% [kernel] [k] down_read_trylock
18.70% [kernel] [k] up_read
15.44% [kernel] [k] shrink_slab
4.37% [kernel] [k] _find_next_bit
2.75% [kernel] [k] xa_load
2.07% [kernel] [k] idr_find
1.73% [kernel] [k] do_shrink_slab
1.42% [kernel] [k] shrink_lruvec
0.74% [kernel] [k] shrink_node
0.60% [kernel] [k] list_lru_count_one
After applying:
19.53% [kernel] [k] _find_next_bit
14.63% [kernel] [k] do_shrink_slab
14.58% [kernel] [k] shrink_slab
11.83% [kernel] [k] shrink_lruvec
9.33% [kernel] [k] __blk_flush_plug
6.67% [kernel] [k] mem_cgroup_iter
3.73% [kernel] [k] list_lru_count_one
2.43% [kernel] [k] shrink_node
1.96% [kernel] [k] super_cache_count
1.78% [kernel] [k] __rcu_read_unlock
1.38% [kernel] [k] __srcu_read_lock
1.30% [kernel] [k] xas_descend
We can see that the readers is no longer blocked.
Signed-off-by: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx>
---
mm/vmscan.c | 56 ++++++++++++++++++++++++++++++-----------------------
1 file changed, 32 insertions(+), 24 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 95a3d6ddc6c1..dc47396ecd0e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -57,6 +57,7 @@
#include <linux/khugepaged.h>
#include <linux/rculist_nulls.h>
#include <linux/random.h>
+#include <linux/srcu.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -221,8 +222,21 @@ static inline int shrinker_defer_size(int nr_items)
static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
int nid)
{
- return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
- lockdep_is_held(&shrinker_rwsem));
+ return srcu_dereference_check(memcg->nodeinfo[nid]->shrinker_info,
+ &shrinker_srcu,
+ lockdep_is_held(&shrinker_rwsem));
+}
+
+static struct shrinker_info *shrinker_info_srcu(struct mem_cgroup *memcg,
+ int nid)
+{
+ return srcu_dereference(memcg->nodeinfo[nid]->shrinker_info,
+ &shrinker_srcu);
+}
+
+static void free_shrinker_info_rcu(struct rcu_head *head)
+{
+ kvfree(container_of(head, struct shrinker_info, rcu));
}
static int expand_one_shrinker_info(struct mem_cgroup *memcg,
@@ -257,7 +271,7 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg,
defer_size - old_defer_size);
rcu_assign_pointer(pn->shrinker_info, new);
- kvfree_rcu(old, rcu);
+ call_srcu(&shrinker_srcu, &old->rcu, free_shrinker_info_rcu);
}
return 0;
@@ -350,13 +364,14 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
{
if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
struct shrinker_info *info;
+ int srcu_idx;
- rcu_read_lock();
- info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
+ srcu_idx = srcu_read_lock(&shrinker_srcu);
+ info = shrinker_info_srcu(memcg, nid);
/* Pairs with smp mb in shrink_slab() */
smp_mb__before_atomic();
set_bit(shrinker_id, info->map);
- rcu_read_unlock();
+ srcu_read_unlock(&shrinker_srcu, srcu_idx);
}
}
@@ -370,7 +385,6 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
return -ENOSYS;
down_write(&shrinker_rwsem);
- /* This may call shrinker, so it must use down_read_trylock() */
id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
if (id < 0)
goto unlock;
@@ -404,7 +418,7 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
{
struct shrinker_info *info;
- info = shrinker_info_protected(memcg, nid);
+ info = shrinker_info_srcu(memcg, nid);
return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0);
}
@@ -413,13 +427,13 @@ static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
{
struct shrinker_info *info;
- info = shrinker_info_protected(memcg, nid);
+ info = shrinker_info_srcu(memcg, nid);
return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]);
}
void reparent_shrinker_deferred(struct mem_cgroup *memcg)
{
- int i, nid;
+ int i, nid, srcu_idx;
long nr;
struct mem_cgroup *parent;
struct shrinker_info *child_info, *parent_info;
@@ -429,16 +443,16 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg)
parent = root_mem_cgroup;
/* Prevent from concurrent shrinker_info expand */
- down_read(&shrinker_rwsem);
+ srcu_idx = srcu_read_lock(&shrinker_srcu);
for_each_node(nid) {
- child_info = shrinker_info_protected(memcg, nid);
- parent_info = shrinker_info_protected(parent, nid);
+ child_info = shrinker_info_srcu(memcg, nid);
+ parent_info = shrinker_info_srcu(parent, nid);
for (i = 0; i < shrinker_nr_max; i++) {
nr = atomic_long_read(&child_info->nr_deferred[i]);
atomic_long_add(nr, &parent_info->nr_deferred[i]);
}
}
- up_read(&shrinker_rwsem);
+ srcu_read_unlock(&shrinker_srcu, srcu_idx);
}
static bool cgroup_reclaim(struct scan_control *sc)
@@ -891,15 +905,14 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
{
struct shrinker_info *info;
unsigned long ret, freed = 0;
+ int srcu_idx;
int i;
if (!mem_cgroup_online(memcg))
return 0;
- if (!down_read_trylock(&shrinker_rwsem))
- return 0;
-
- info = shrinker_info_protected(memcg, nid);
+ srcu_idx = srcu_read_lock(&shrinker_srcu);
+ info = shrinker_info_srcu(memcg, nid);
if (unlikely(!info))
goto unlock;
@@ -949,14 +962,9 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
set_shrinker_bit(memcg, nid, i);
}
freed += ret;
-
- if (rwsem_is_contended(&shrinker_rwsem)) {
- freed = freed ? : 1;
- break;
- }
}
unlock:
- up_read(&shrinker_rwsem);
+ srcu_read_unlock(&shrinker_srcu, srcu_idx);
return freed;
}
#else /* CONFIG_MEMCG */
--
2.20.1