Re: [PATCH 2/2] mm, memcg: Decouple e{low,min} state mutations from protection checks

From: Michal Hocko
Date: Wed Apr 29 2020 - 06:06:47 EST


On Tue 28-04-20 19:27:00, Chris Down wrote:
> mem_cgroup_protected currently is both used to set effective low and min
> and return a mem_cgroup_protection based on the result. As a user, this
> can be a little unexpected: it appears to be a simple predicate
> function, if not for the big warning in the comment above about the
> order in which it must be executed.
>
> This change makes it so that we separate the state mutations from the
> actual protection checks, which makes it more obvious where we need to
> be careful mutating internal state, and where we are simply checking and
> don't need to worry about that.
>
> Signed-off-by: Chris Down <chris@xxxxxxxxxxxxxx>
> Suggested-by: Johannes Weiner <hannes@xxxxxxxxxxx>
> Cc: Michal Hocko <mhocko@xxxxxxxxxx>
> Cc: Roman Gushchin <guro@xxxxxx>
> Cc: Yafang Shao <laoar.shao@xxxxxxxxx>

Acked-by: Michal Hocko <mhocko@xxxxxxxx>

> ---
> include/linux/memcontrol.h | 48 +++++++++++++++++++++++++++++---------
> mm/memcontrol.c | 30 +++++++-----------------
> mm/vmscan.c | 17 ++++----------
> 3 files changed, 49 insertions(+), 46 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index d630af1a4e17..88576b1235b0 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -50,12 +50,6 @@ enum memcg_memory_event {
> MEMCG_NR_MEMORY_EVENTS,
> };
>
> -enum mem_cgroup_protection {
> - MEMCG_PROT_NONE,
> - MEMCG_PROT_LOW,
> - MEMCG_PROT_MIN,
> -};
> -
> struct mem_cgroup_reclaim_cookie {
> pg_data_t *pgdat;
> unsigned int generation;
> @@ -357,8 +351,26 @@ static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
> READ_ONCE(memcg->memory.elow));
> }
>
> -enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
> - struct mem_cgroup *memcg);
> +void mem_cgroup_calculate_protection(struct mem_cgroup *root,
> + struct mem_cgroup *memcg);
> +
> +static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg)
> +{
> + if (mem_cgroup_disabled())
> + return false;
> +
> + return READ_ONCE(memcg->memory.elow) >=
> + page_counter_read(&memcg->memory);
> +}
> +
> +static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
> +{
> + if (mem_cgroup_disabled())
> + return false;
> +
> + return READ_ONCE(memcg->memory.emin) >=
> + page_counter_read(&memcg->memory);
> +}
>
> int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
> gfp_t gfp_mask, struct mem_cgroup **memcgp,
> @@ -838,13 +850,27 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
> static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
> bool in_low_reclaim)
> {
> +
> +
> +static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
> + struct mem_cgroup *memcg);
> +{
> +}
> +
> +static inline void mem_cgroup_protection(struct mem_cgroup *memcg,
> + bool in_low_reclaim)
> +{
> return 0;
> }
>
> -static inline enum mem_cgroup_protection mem_cgroup_protected(
> - struct mem_cgroup *root, struct mem_cgroup *memcg)
> +static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg)
> +{
> + return false;
> +}
> +
> +static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
> {
> - return MEMCG_PROT_NONE;
> + return false;
> }
>
> static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index b0374be44e9e..317dbbaac603 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -6368,27 +6368,21 @@ static unsigned long effective_protection(unsigned long usage,
> }
>
> /**
> - * mem_cgroup_protected - check if memory consumption is in the normal range
> + * mem_cgroup_calculate_protection - calculate and cache effective low and min
> * @root: the top ancestor of the sub-tree being checked
> * @memcg: the memory cgroup to check
> *
> * WARNING: This function is not stateless! It can only be used as part
> * of a top-down tree iteration, not for isolated queries.
> - *
> - * Returns one of the following:
> - * MEMCG_PROT_NONE: cgroup memory is not protected
> - * MEMCG_PROT_LOW: cgroup memory is protected as long there is
> - * an unprotected supply of reclaimable memory from other cgroups.
> - * MEMCG_PROT_MIN: cgroup memory is protected
> */
> -enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
> - struct mem_cgroup *memcg)
> +void mem_cgroup_calculate_protection(struct mem_cgroup *root,
> + struct mem_cgroup *memcg)
> {
> unsigned long usage, parent_usage;
> struct mem_cgroup *parent;
>
> if (mem_cgroup_disabled())
> - return MEMCG_PROT_NONE;
> + return;
>
> if (!root)
> root = root_mem_cgroup;
> @@ -6403,22 +6397,22 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
> */
> WRITE_ONCE(memcg->memory.emin, 0);
> WRITE_ONCE(memcg->memory.elow, 0);
> - return MEMCG_PROT_NONE;
> + return;
> }
>
> usage = page_counter_read(&memcg->memory);
> if (!usage)
> - return MEMCG_PROT_NONE;
> + return;
>
> parent = parent_mem_cgroup(memcg);
> /* No parent means a non-hierarchical mode on v1 memcg */
> if (!parent)
> - return MEMCG_PROT_NONE;
> + return;
>
> if (parent == root) {
> memcg->memory.emin = READ_ONCE(memcg->memory.min);
> memcg->memory.elow = memcg->memory.low;
> - goto out;
> + return;
> }
>
> parent_usage = page_counter_read(&parent->memory);
> @@ -6431,14 +6425,6 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
> WRITE_ONCE(memcg->memory.elow, effective_protection(usage, parent_usage,
> memcg->memory.low, READ_ONCE(parent->memory.elow),
> atomic_long_read(&parent->memory.children_low_usage)));
> -
> -out:
> - if (usage <= memcg->memory.emin)
> - return MEMCG_PROT_MIN;
> - else if (usage <= memcg->memory.elow)
> - return MEMCG_PROT_LOW;
> - else
> - return MEMCG_PROT_NONE;
> }
>
> /**
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 72ac38eb8c29..e913c4652341 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2645,14 +2645,15 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
> unsigned long reclaimed;
> unsigned long scanned;
>
> - switch (mem_cgroup_protected(target_memcg, memcg)) {
> - case MEMCG_PROT_MIN:
> + mem_cgroup_calculate_protection(target_memcg, memcg);
> +
> + if (mem_cgroup_below_min(memcg)) {
> /*
> * Hard protection.
> * If there is no reclaimable memory, OOM.
> */
> continue;
> - case MEMCG_PROT_LOW:
> + } else if (mem_cgroup_below_low(memcg)) {
> /*
> * Soft protection.
> * Respect the protection only as long as
> @@ -2664,16 +2665,6 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
> continue;
> }
> memcg_memory_event(memcg, MEMCG_LOW);
> - break;
> - case MEMCG_PROT_NONE:
> - /*
> - * All protection thresholds breached. We may
> - * still choose to vary the scan pressure
> - * applied based on by how much the cgroup in
> - * question has exceeded its protection
> - * thresholds (see get_scan_count).
> - */
> - break;
> }
>
> reclaimed = sc->nr_reclaimed;
> --
> 2.26.2

--
Michal Hocko
SUSE Labs