Re: [PATCH v3 3/9] mm/workingset: extend the workingset detection for anon LRU

From: Johannes Weiner
Date: Wed Mar 18 2020 - 14:06:45 EST


On Tue, Mar 17, 2020 at 02:41:51PM +0900, js1304@xxxxxxxxx wrote:
> From: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
>
> In the following patch, workingset detection will be applied to
> anonymous LRU. To prepare it, this patch adds some code to
> distinguish/handle the both LRUs.
>
> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>

This looks all correct to me, but I would ask for some style
fixes. With those applied, please feel free to add

Acked-by: Johannes Weiner <hannes@xxxxxxxxxxx>

> @@ -304,10 +308,10 @@ enum lruvec_flags {
> struct lruvec {
> struct list_head lists[NR_LRU_LISTS];
> struct zone_reclaim_stat reclaim_stat;
> - /* Evictions & activations on the inactive file list */
> - atomic_long_t inactive_age;
> + /* Evictions & activations on the inactive list */
> + atomic_long_t inactive_age[2];

Can you please add anon=0, file=1 to the comment?

> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1431,10 +1431,14 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
> seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGMAJFAULT),
> memcg_events(memcg, PGMAJFAULT));
>
> - seq_buf_printf(&s, "workingset_refault %lu\n",
> - memcg_page_state(memcg, WORKINGSET_REFAULT));
> - seq_buf_printf(&s, "workingset_activate %lu\n",
> - memcg_page_state(memcg, WORKINGSET_ACTIVATE));
> + seq_buf_printf(&s, "workingset_refault_anon %lu\n",
> + memcg_page_state(memcg, WORKINGSET_REFAULT_ANON));
> + seq_buf_printf(&s, "workingset_refault_file %lu\n",
> + memcg_page_state(memcg, WORKINGSET_REFAULT_FILE));
> + seq_buf_printf(&s, "workingset_activate_anon %lu\n",
> + memcg_page_state(memcg, WORKINGSET_ACTIVATE_ANON));
> + seq_buf_printf(&s, "workingset_activate_file %lu\n",
> + memcg_page_state(memcg, WORKINGSET_ACTIVATE_FILE));
> seq_buf_printf(&s, "workingset_nodereclaim %lu\n",
> memcg_page_state(memcg, WORKINGSET_NODERECLAIM));
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index c932141..0493c25 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2716,7 +2716,10 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
> if (!sc->force_deactivate) {
> unsigned long refaults;
>
> - if (inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
> + refaults = lruvec_page_state(target_lruvec,
> + WORKINGSET_ACTIVATE_ANON);
> + if (refaults != target_lruvec->refaults[0] ||
> + inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
> sc->may_deactivate |= DEACTIVATE_ANON;
> else
> sc->may_deactivate &= ~DEACTIVATE_ANON;
> @@ -2727,8 +2730,8 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
> * rid of any stale active pages quickly.
> */
> refaults = lruvec_page_state(target_lruvec,
> - WORKINGSET_ACTIVATE);
> - if (refaults != target_lruvec->refaults ||
> + WORKINGSET_ACTIVATE_FILE);
> + if (refaults != target_lruvec->refaults[1] ||
> inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
> sc->may_deactivate |= DEACTIVATE_FILE;
> else
> @@ -3007,8 +3010,10 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat)
> unsigned long refaults;
>
> target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
> - refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE);
> - target_lruvec->refaults = refaults;
> + refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON);
> + target_lruvec->refaults[0] = refaults;
> + refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_FILE);
> + target_lruvec->refaults[1] = refaults;
> }
>
> /*
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 78d5337..3cdf8e9 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1146,8 +1146,10 @@ const char * const vmstat_text[] = {
> "nr_isolated_anon",
> "nr_isolated_file",
> "workingset_nodes",
> - "workingset_refault",
> - "workingset_activate",
> + "workingset_refault_anon",
> + "workingset_refault_file",
> + "workingset_activate_anon",
> + "workingset_activate_file",
> "workingset_restore",
> "workingset_nodereclaim",
> "nr_anon_pages",
> diff --git a/mm/workingset.c b/mm/workingset.c
> index 474186b..5fb8f85 100644
> --- a/mm/workingset.c
> +++ b/mm/workingset.c
> @@ -15,6 +15,7 @@
> #include <linux/dax.h>
> #include <linux/fs.h>
> #include <linux/mm.h>
> +#include <linux/mm_inline.h>

Please keep the line-length sorting of the includes intact. If that's
not possible, please separate the mm_inline.h include with an empty
line into its own section.

This makes it much easier to scan for specific headers, helps avoid
duplicate includes, and is generally just easier on the eyes.

> @@ -156,7 +157,7 @@
> *
> * Implementation
> *
> - * For each node's file LRU lists, a counter for inactive evictions
> + * For each node's anon/file LRU lists, a counter for inactive evictions
> * and activations is maintained (node->inactive_age).
> *
> * On eviction, a snapshot of this counter (along with some bits to
> @@ -213,7 +214,8 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
> *workingsetp = workingset;
> }
>
> -static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat)
> +static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat,
> + int is_file)

bool file, please.

> {
> /*
> * Reclaiming a cgroup means reclaiming all its children in a
> @@ -230,7 +232,7 @@ static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat)
> struct lruvec *lruvec;
>
> lruvec = mem_cgroup_lruvec(memcg, pgdat);
> - atomic_long_inc(&lruvec->inactive_age);
> + atomic_long_inc(&lruvec->inactive_age[is_file]);
> } while (memcg && (memcg = parent_mem_cgroup(memcg)));
> }
>
> @@ -248,18 +250,19 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
> unsigned long eviction;
> struct lruvec *lruvec;
> int memcgid;
> + int is_file = page_is_file_cache(page);

Please keep the line-length ordering intact here as well.

> /* Page is fully exclusive and pins page->mem_cgroup */
> VM_BUG_ON_PAGE(PageLRU(page), page);
> VM_BUG_ON_PAGE(page_count(page), page);
> VM_BUG_ON_PAGE(!PageLocked(page), page);
>
> - advance_inactive_age(page_memcg(page), pgdat);
> + advance_inactive_age(page_memcg(page), pgdat, is_file);
>
> lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
> /* XXX: target_memcg can be NULL, go through lruvec */
> memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
> - eviction = atomic_long_read(&lruvec->inactive_age);
> + eviction = atomic_long_read(&lruvec->inactive_age[is_file]);
> return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
> }
>
> @@ -278,13 +281,16 @@ void workingset_refault(struct page *page, void *shadow)
> struct lruvec *eviction_lruvec;
> unsigned long refault_distance;
> struct pglist_data *pgdat;
> - unsigned long active_file;
> + unsigned long active;
> struct mem_cgroup *memcg;
> unsigned long eviction;
> struct lruvec *lruvec;
> unsigned long refault;
> bool workingset;
> int memcgid;
> + int is_file = page_is_file_cache(page);
> + enum lru_list active_lru = page_lru_base_type(page) + LRU_ACTIVE;
> + enum node_stat_item workingset_stat;

Please use bool file and keep line-length ordering.

The workingset_stat variable doesn't seem helpful, see below:

> unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
>
> @@ -309,8 +315,8 @@ void workingset_refault(struct page *page, void *shadow)
> if (!mem_cgroup_disabled() && !eviction_memcg)
> goto out;
> eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
> - refault = atomic_long_read(&eviction_lruvec->inactive_age);
> - active_file = lruvec_page_state(eviction_lruvec, NR_ACTIVE_FILE);
> + refault = atomic_long_read(&eviction_lruvec->inactive_age[is_file]);
> + active = lruvec_page_state(eviction_lruvec, active_lru);
>
> /*
> * Calculate the refault distance
> @@ -341,19 +347,21 @@ void workingset_refault(struct page *page, void *shadow)
> memcg = page_memcg(page);
> lruvec = mem_cgroup_lruvec(memcg, pgdat);
>
> - inc_lruvec_state(lruvec, WORKINGSET_REFAULT);
> + workingset_stat = WORKINGSET_REFAULT_BASE + is_file;
> + inc_lruvec_state(lruvec, workingset_stat);

inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file);

> /*
> * Compare the distance to the existing workingset size. We
> * don't act on pages that couldn't stay resident even if all
> * the memory was available to the page cache.
> */
> - if (refault_distance > active_file)
> + if (refault_distance > active)
> goto out;
>
> SetPageActive(page);
> - advance_inactive_age(memcg, pgdat);
> - inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
> + advance_inactive_age(memcg, pgdat, is_file);
> + workingset_stat = WORKINGSET_ACTIVATE_BASE + is_file;
> + inc_lruvec_state(lruvec, workingset_stat);

inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file);

> /* Page was active prior to eviction */
> if (workingset) {
> @@ -371,6 +379,7 @@ void workingset_refault(struct page *page, void *shadow)
> void workingset_activation(struct page *page)
> {
> struct mem_cgroup *memcg;
> + int is_file = page_is_file_cache(page);

bool file & length-ordering