[PATCH v4 01/12] mm/vmstat: remove remote node draining

From: Marcelo Tosatti
Date: Sun Mar 05 2023 - 08:42:50 EST


Draining of pages from the local pcp for a remote zone should not be
necessary, since once the system is low on memory (or compaction on a
zone is in effect), drain_all_pages should be called freeing any unused
pcps.

For reference, the original commit which introduces remote node
draining is 4037d452202e34214e8a939fa5621b2b3bbb45b7.

Acked-by: David Hildenbrand <david@xxxxxxxxxx>
Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx>

Index: linux-vmstat-remote/include/linux/mmzone.h
===================================================================
--- linux-vmstat-remote.orig/include/linux/mmzone.h
+++ linux-vmstat-remote/include/linux/mmzone.h
@@ -679,9 +679,6 @@ struct per_cpu_pages {
int high; /* high watermark, emptying needed */
int batch; /* chunk size for buddy add/remove */
short free_factor; /* batch scaling factor during free */
-#ifdef CONFIG_NUMA
- short expire; /* When 0, remote pagesets are drained */
-#endif

/* Lists of pages, one per migrate type stored on the pcp-lists */
struct list_head lists[NR_PCP_LISTS];
Index: linux-vmstat-remote/mm/vmstat.c
===================================================================
--- linux-vmstat-remote.orig/mm/vmstat.c
+++ linux-vmstat-remote/mm/vmstat.c
@@ -803,20 +803,16 @@ static int fold_diff(int *zone_diff, int
*
* The function returns the number of global counters updated.
*/
-static int refresh_cpu_vm_stats(bool do_pagesets)
+static int refresh_cpu_vm_stats(void)
{
struct pglist_data *pgdat;
struct zone *zone;
int i;
int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
- int changes = 0;

for_each_populated_zone(zone) {
struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
-#ifdef CONFIG_NUMA
- struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset;
-#endif

for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
int v;
@@ -826,44 +822,8 @@ static int refresh_cpu_vm_stats(bool do_

atomic_long_add(v, &zone->vm_stat[i]);
global_zone_diff[i] += v;
-#ifdef CONFIG_NUMA
- /* 3 seconds idle till flush */
- __this_cpu_write(pcp->expire, 3);
-#endif
}
}
-#ifdef CONFIG_NUMA
-
- if (do_pagesets) {
- cond_resched();
- /*
- * Deal with draining the remote pageset of this
- * processor
- *
- * Check if there are pages remaining in this pageset
- * if not then there is nothing to expire.
- */
- if (!__this_cpu_read(pcp->expire) ||
- !__this_cpu_read(pcp->count))
- continue;
-
- /*
- * We never drain zones local to this processor.
- */
- if (zone_to_nid(zone) == numa_node_id()) {
- __this_cpu_write(pcp->expire, 0);
- continue;
- }
-
- if (__this_cpu_dec_return(pcp->expire))
- continue;
-
- if (__this_cpu_read(pcp->count)) {
- drain_zone_pages(zone, this_cpu_ptr(pcp));
- changes++;
- }
- }
-#endif
}

for_each_online_pgdat(pgdat) {
@@ -880,8 +840,7 @@ static int refresh_cpu_vm_stats(bool do_
}
}

- changes += fold_diff(global_zone_diff, global_node_diff);
- return changes;
+ return fold_diff(global_zone_diff, global_node_diff);
}

/*
@@ -1867,7 +1826,7 @@ int sysctl_stat_interval __read_mostly =
#ifdef CONFIG_PROC_FS
static void refresh_vm_stats(struct work_struct *work)
{
- refresh_cpu_vm_stats(true);
+ refresh_cpu_vm_stats();
}

int vmstat_refresh(struct ctl_table *table, int write,
@@ -1877,6 +1836,8 @@ int vmstat_refresh(struct ctl_table *tab
int err;
int i;

+ drain_all_pages(NULL);
+
/*
* The regular update, every sysctl_stat_interval, may come later
* than expected: leaving a significant amount in per_cpu buckets.
@@ -1931,7 +1892,7 @@ int vmstat_refresh(struct ctl_table *tab

static void vmstat_update(struct work_struct *w)
{
- if (refresh_cpu_vm_stats(true)) {
+ if (refresh_cpu_vm_stats()) {
/*
* Counters were updated so we expect more updates
* to occur in the future. Keep on running the
@@ -1994,7 +1955,7 @@ void quiet_vmstat(void)
* it would be too expensive from this path.
* vmstat_shepherd will take care about that for us.
*/
- refresh_cpu_vm_stats(false);
+ refresh_cpu_vm_stats();
}

/*
Index: linux-vmstat-remote/mm/page_alloc.c
===================================================================
--- linux-vmstat-remote.orig/mm/page_alloc.c
+++ linux-vmstat-remote/mm/page_alloc.c
@@ -3176,26 +3176,6 @@ static int rmqueue_bulk(struct zone *zon
return allocated;
}

-#ifdef CONFIG_NUMA
-/*
- * Called from the vmstat counter updater to drain pagesets of this
- * currently executing processor on remote nodes after they have
- * expired.
- */
-void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
-{
- int to_drain, batch;
-
- batch = READ_ONCE(pcp->batch);
- to_drain = min(pcp->count, batch);
- if (to_drain > 0) {
- spin_lock(&pcp->lock);
- free_pcppages_bulk(zone, to_drain, pcp, 0);
- spin_unlock(&pcp->lock);
- }
-}
-#endif
-
/*
* Drain pcplists of the indicated processor and zone.
*/