Re: [RFC v1] memcg: add memcg lru for page reclaiming

From: Michal Hocko
Date: Mon Oct 21 2019 - 08:14:58 EST


On Mon 21-10-19 19:56:54, Hillf Danton wrote:
>
> Currently soft limit reclaim is frozen, see
> Documentation/admin-guide/cgroup-v2.rst for reasons.
>
> Copying the page lru idea, memcg lru is added for selecting victim
> memcg to reclaim pages from under memory pressure. It now works in
> parallel to slr not only because the latter needs some time to reap
> but the coexistence facilitates it a lot to add the lru in a straight
> forward manner.

This doesn't explain what is the problem/feature you would like to
fix/achieve. It also doesn't explain the overall design.

> A lru list paired with a spin lock is added, thanks to the current
> memcg high_work that provides other things it needs, and a couple of
> helpers to add memcg to and pick victim from lru.
>
> V1 is based on 5.4-rc3.
>
> Changes since v0
> - add MEMCG_LRU in init/Kconfig
> - drop changes in mm/vmscan.c
> - make memcg lru work in parallel to slr
>
> Cc: Chris Down <chris@xxxxxxxxxxxxxx>
> Cc: Tejun Heo <tj@xxxxxxxxxx>
> Cc: Roman Gushchin <guro@xxxxxx>
> Cc: Michal Hocko <mhocko@xxxxxxxxxx>
> Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
> Cc: Shakeel Butt <shakeelb@xxxxxxxxxx>
> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
> Cc: Minchan Kim <minchan@xxxxxxxxxx>
> Cc: Mel Gorman <mgorman@xxxxxxx>
> Signed-off-by: Hillf Danton <hdanton@xxxxxxxx>
> ---
>
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -843,6 +843,14 @@ config MEMCG
> help
> Provides control over the memory footprint of tasks in a cgroup.
>
> +config MEMCG_LRU
> + bool
> + depends on MEMCG
> + help
> + Select victim memcg on lru for page reclaiming.
> +
> + Say N if unsure.
> +
> config MEMCG_SWAP
> bool "Swap controller"
> depends on MEMCG && SWAP
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -223,6 +223,10 @@ struct mem_cgroup {
> /* Upper bound of normal memory consumption range */
> unsigned long high;
>
> +#ifdef CONFIG_MEMCG_LRU
> + struct list_head lru_node;
> +#endif
> +
> /* Range enforcement for interrupt charges */
> struct work_struct high_work;
>
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2338,14 +2338,54 @@ static int memcg_hotplug_cpu_dead(unsign
> return 0;
> }
>
> +#ifdef CONFIG_MEMCG_LRU
> +static DEFINE_SPINLOCK(memcg_lru_lock);
> +static LIST_HEAD(memcg_lru); /* a copy of page lru */
> +
> +static void memcg_add_lru(struct mem_cgroup *memcg)
> +{
> + spin_lock_irq(&memcg_lru_lock);
> + if (list_empty(&memcg->lru_node))
> + list_add_tail(&memcg->lru_node, &memcg_lru);
> + spin_unlock_irq(&memcg_lru_lock);
> +}
> +
> +static struct mem_cgroup *memcg_pick_lru(void)
> +{
> + struct mem_cgroup *memcg, *next;
> +
> + spin_lock_irq(&memcg_lru_lock);
> +
> + list_for_each_entry_safe(memcg, next, &memcg_lru, lru_node) {
> + list_del_init(&memcg->lru_node);
> +
> + if (page_counter_read(&memcg->memory) > memcg->high) {
> + spin_unlock_irq(&memcg_lru_lock);
> + return memcg;
> + }
> + }
> + spin_unlock_irq(&memcg_lru_lock);
> +
> + return NULL;
> +}
> +#endif
> +
> static void reclaim_high(struct mem_cgroup *memcg,
> unsigned int nr_pages,
> gfp_t gfp_mask)
> {
> +#ifdef CONFIG_MEMCG_LRU
> + struct mem_cgroup *start = memcg;
> +#endif
> do {
> if (page_counter_read(&memcg->memory) <= memcg->high)
> continue;
> memcg_memory_event(memcg, MEMCG_HIGH);
> + if (IS_ENABLED(CONFIG_MEMCG_LRU))
> + if (start != memcg) {
> + memcg_add_lru(memcg);
> + return;
> + }
> try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
> } while ((memcg = parent_mem_cgroup(memcg)));
> }
> @@ -3158,6 +3198,13 @@ unsigned long mem_cgroup_soft_limit_recl
> unsigned long excess;
> unsigned long nr_scanned;
>
> + if (IS_ENABLED(CONFIG_MEMCG_LRU)) {
> + struct mem_cgroup *memcg = memcg_pick_lru();
> + if (memcg)
> + schedule_work(&memcg->high_work);
> + return 0;
> + }
> +
> if (order > 0)
> return 0;
>
> @@ -5068,6 +5115,8 @@ static struct mem_cgroup *mem_cgroup_all
> if (memcg_wb_domain_init(memcg, GFP_KERNEL))
> goto fail;
>
> + if (IS_ENABLED(CONFIG_MEMCG_LRU))
> + INIT_LIST_HEAD(&memcg->lru_node);
> INIT_WORK(&memcg->high_work, high_work_func);
> memcg->last_scanned_node = MAX_NUMNODES;
> INIT_LIST_HEAD(&memcg->oom_notify);
> --
>

--
Michal Hocko
SUSE Labs