[PATCH v3] mm: vmpressure: don't count proactive reclaim in vmpressure

From: Yosry Ahmed
Date: Thu Jun 30 2022 - 04:31:03 EST


vmpressure is used in cgroup v1 to notify userspace of reclaim
efficiency events, and is also used in both cgroup v1 and v2 as a signal
for memory pressure for networking, see
mem_cgroup_under_socket_pressure().

Proactive reclaim intends to probe memcgs for cold memory, without
affecting their performance. Hence, reclaim caused by writing to
memory.reclaim should not trigger vmpressure.

Signed-off-by: Yosry Ahmed <yosryahmed@xxxxxxxxxx>
---

Changes in v3:
- Limited the vmpressure change to memory.reclaim, dropped psi changes,
updated changelog to reflect new behavior (Michal, Shakeel)

Changes in v2:
- Removed unnecessary initializations to zero (Andrew).
- Separate declarations and initializations when it causes line wrapping
(Andrew).

---
include/linux/swap.h | 5 ++++-
mm/memcontrol.c | 27 ++++++++++++++++++---------
mm/vmscan.c | 27 +++++++++++++++++----------
3 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0c0fed1b348f2..f6e9eaa2339fe 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -411,10 +411,13 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
extern unsigned long zone_reclaimable_pages(struct zone *zone);
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask, nodemask_t *mask);
+
+#define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
+#define MEMCG_RECLAIM_PROACTIVE (1 << 2)
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- bool may_swap);
+ unsigned int reclaim_options);
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
pg_data_t *pgdat,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9f2731d62eef2..8dc29cbb5fbf7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2319,6 +2319,7 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
gfp_t gfp_mask)
{
unsigned long nr_reclaimed = 0;
+ unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP;

do {
unsigned long pflags;
@@ -2331,7 +2332,8 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,

psi_memstall_enter(&pflags);
nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
- gfp_mask, true);
+ gfp_mask,
+ reclaim_options);
psi_memstall_leave(&pflags);
} while ((memcg = parent_mem_cgroup(memcg)) &&
!mem_cgroup_is_root(memcg));
@@ -2576,7 +2578,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
struct page_counter *counter;
unsigned long nr_reclaimed;
bool passed_oom = false;
- bool may_swap = true;
+ unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP;
bool drained = false;
unsigned long pflags;

@@ -2593,7 +2595,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
mem_over_limit = mem_cgroup_from_counter(counter, memory);
} else {
mem_over_limit = mem_cgroup_from_counter(counter, memsw);
- may_swap = false;
+ reclaim_options &= ~MEMCG_RECLAIM_MAY_SWAP;
}

if (batch > nr_pages) {
@@ -2620,7 +2622,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,

psi_memstall_enter(&pflags);
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
- gfp_mask, may_swap);
+ gfp_mask, reclaim_options);
psi_memstall_leave(&pflags);

if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
@@ -3369,6 +3371,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
int ret;
bool limits_invariant;
struct page_counter *counter = memsw ? &memcg->memsw : &memcg->memory;
+ unsigned int reclaim_options = memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP;

do {
if (signal_pending(current)) {
@@ -3403,7 +3406,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
}

if (!try_to_free_mem_cgroup_pages(memcg, 1,
- GFP_KERNEL, !memsw)) {
+ GFP_KERNEL, reclaim_options)) {
ret = -EBUSY;
break;
}
@@ -3502,6 +3505,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
{
int nr_retries = MAX_RECLAIM_RETRIES;
+ unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP;

/* we call try-to-free pages for make this cgroup empty */
lru_add_drain_all();
@@ -3513,7 +3517,8 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
if (signal_pending(current))
return -EINTR;

- if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, true))
+ if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
+ reclaim_options))
nr_retries--;
}

@@ -6222,6 +6227,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
bool drained = false;
unsigned long high;
+ unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP;
int err;

buf = strstrip(buf);
@@ -6248,7 +6254,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
}

reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
- GFP_KERNEL, true);
+ GFP_KERNEL, reclaim_options);

if (!reclaimed && !nr_retries--)
break;
@@ -6271,6 +6277,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
unsigned int nr_reclaims = MAX_RECLAIM_RETRIES;
bool drained = false;
unsigned long max;
+ unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP;
int err;

buf = strstrip(buf);
@@ -6297,7 +6304,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,

if (nr_reclaims) {
if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
- GFP_KERNEL, true))
+ GFP_KERNEL, reclaim_options))
nr_reclaims--;
continue;
}
@@ -6426,6 +6433,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
unsigned long nr_to_reclaim, nr_reclaimed = 0;
+ unsigned int reclaim_options;
int err;

buf = strstrip(buf);
@@ -6433,6 +6441,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
if (err)
return err;

+ reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
while (nr_reclaimed < nr_to_reclaim) {
unsigned long reclaimed;

@@ -6449,7 +6458,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,

reclaimed = try_to_free_mem_cgroup_pages(memcg,
nr_to_reclaim - nr_reclaimed,
- GFP_KERNEL, true);
+ GFP_KERNEL, reclaim_options);

if (!reclaimed && !nr_retries--)
return -EAGAIN;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f7d9a683e3a7d..0969e6408a539 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -102,6 +102,9 @@ struct scan_control {
/* Can pages be swapped as part of reclaim? */
unsigned int may_swap:1;

+ /* Proactive reclaim invoked by userspace through memory.reclaim */
+ unsigned int proactive:1;
+
/*
* Cgroup memory below memory.low is protected as long as we
* don't threaten to OOM. If any cgroup is reclaimed at
@@ -3125,9 +3128,10 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
sc->priority);

/* Record the group's reclaim efficiency */
- vmpressure(sc->gfp_mask, memcg, false,
- sc->nr_scanned - scanned,
- sc->nr_reclaimed - reclaimed);
+ if (!sc->proactive)
+ vmpressure(sc->gfp_mask, memcg, false,
+ sc->nr_scanned - scanned,
+ sc->nr_reclaimed - reclaimed);

} while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
}
@@ -3250,9 +3254,10 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
}

/* Record the subtree's reclaim efficiency */
- vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
- sc->nr_scanned - nr_scanned,
- sc->nr_reclaimed - nr_reclaimed);
+ if (!sc->proactive)
+ vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
+ sc->nr_scanned - nr_scanned,
+ sc->nr_reclaimed - nr_reclaimed);

if (sc->nr_reclaimed - nr_reclaimed)
reclaimable = true;
@@ -3534,8 +3539,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
__count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1);

do {
- vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
- sc->priority);
+ if (!sc->proactive)
+ vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
+ sc->priority);
sc->nr_scanned = 0;
shrink_zones(zonelist, sc);

@@ -3825,7 +3831,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- bool may_swap)
+ unsigned int reclaim_options)
{
unsigned long nr_reclaimed;
unsigned int noreclaim_flag;
@@ -3838,7 +3844,8 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
.priority = DEF_PRIORITY,
.may_writepage = !laptop_mode,
.may_unmap = 1,
- .may_swap = may_swap,
+ .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
+ .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
};
/*
* Traverse the ZONELIST_FALLBACK zonelist of the current node to put
--
2.37.0.rc0.161.g10f37bed90-goog