[PATCH v3 1/2] mm/memcg: try harder to decrease [memory,memsw].limit_in_bytes

From: Andrey Ryabinin
Date: Tue Jan 09 2018 - 11:58:34 EST


mem_cgroup_resize_[memsw]_limit() tries to free only 32 (SWAP_CLUSTER_MAX)
pages on each iteration. This makes practically impossible to decrease
limit of memory cgroup. Tasks could easily allocate back 32 pages,
so we can't reduce memory usage, and once retry_count reaches zero we return
-EBUSY.

Easy to reproduce the problem by running the following commands:

mkdir /sys/fs/cgroup/memory/test
echo $$ >> /sys/fs/cgroup/memory/test/tasks
cat big_file > /dev/null &
sleep 1 && echo $((100*1024*1024)) > /sys/fs/cgroup/memory/test/memory.limit_in_bytes
-bash: echo: write error: Device or resource busy

Instead of relying on retry_count, keep retrying the reclaim until
the desired limit is reached or fail if the reclaim doesn't make
any progress or a signal is pending.

Signed-off-by: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx>
---

Changes since v2:
- Changelog wording per mhocko@

mm/memcontrol.c | 70 +++++++++++++--------------------------------------------
1 file changed, 16 insertions(+), 54 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f40b5ad3f959..0d26db9a665d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1176,20 +1176,6 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
}

/*
- * This function returns the number of memcg under hierarchy tree. Returns
- * 1(self count) if no children.
- */
-static int mem_cgroup_count_children(struct mem_cgroup *memcg)
-{
- int num = 0;
- struct mem_cgroup *iter;
-
- for_each_mem_cgroup_tree(iter, memcg)
- num++;
- return num;
-}
-
-/*
* Return the memory (and swap, if configured) limit for a memcg.
*/
unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
@@ -2462,22 +2448,10 @@ static DEFINE_MUTEX(memcg_limit_mutex);
static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
unsigned long limit)
{
- unsigned long curusage;
- unsigned long oldusage;
+ unsigned long usage;
bool enlarge = false;
- int retry_count;
int ret;

- /*
- * For keeping hierarchical_reclaim simple, how long we should retry
- * is depends on callers. We set our retry-count to be function
- * of # of children which we should visit in this loop.
- */
- retry_count = MEM_CGROUP_RECLAIM_RETRIES *
- mem_cgroup_count_children(memcg);
-
- oldusage = page_counter_read(&memcg->memory);
-
do {
if (signal_pending(current)) {
ret = -EINTR;
@@ -2498,15 +2472,13 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
if (!ret)
break;

- try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, true);
-
- curusage = page_counter_read(&memcg->memory);
- /* Usage is reduced ? */
- if (curusage >= oldusage)
- retry_count--;
- else
- oldusage = curusage;
- } while (retry_count);
+ usage = page_counter_read(&memcg->memory);
+ if (!try_to_free_mem_cgroup_pages(memcg, usage - limit,
+ GFP_KERNEL, true)) {
+ ret = -EBUSY;
+ break;
+ }
+ } while (true);

if (!ret && enlarge)
memcg_oom_recover(memcg);
@@ -2517,18 +2489,10 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
unsigned long limit)
{
- unsigned long curusage;
- unsigned long oldusage;
+ unsigned long usage;
bool enlarge = false;
- int retry_count;
int ret;

- /* see mem_cgroup_resize_res_limit */
- retry_count = MEM_CGROUP_RECLAIM_RETRIES *
- mem_cgroup_count_children(memcg);
-
- oldusage = page_counter_read(&memcg->memsw);
-
do {
if (signal_pending(current)) {
ret = -EINTR;
@@ -2549,15 +2513,13 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
if (!ret)
break;

- try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, false);
-
- curusage = page_counter_read(&memcg->memsw);
- /* Usage is reduced ? */
- if (curusage >= oldusage)
- retry_count--;
- else
- oldusage = curusage;
- } while (retry_count);
+ usage = page_counter_read(&memcg->memsw);
+ if (!try_to_free_mem_cgroup_pages(memcg, usage - limit,
+ GFP_KERNEL, false)) {
+ ret = -EBUSY;
+ break;
+ }
+ } while (true);

if (!ret && enlarge)
memcg_oom_recover(memcg);
--
2.13.6