[patch 4/4] mm: vmstat_refresh: avoid queueing work item if cpu stats are clean

From: Marcelo Tosatti
Date: Fri Jul 30 2021 - 16:21:19 EST


It is not necessary to queue work item to run refresh_vm_stats
on a remote CPU if that CPU has no dirty stats and no per-CPU
allocations for remote nodes.

This fixes sosreport hang (which uses vmstat_refresh) with
spinning SCHED_FIFO process.

Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx>

Index: linux-2.6-vmstat-update/mm/vmstat.c
===================================================================
--- linux-2.6-vmstat-update.orig/mm/vmstat.c
+++ linux-2.6-vmstat-update/mm/vmstat.c
@@ -1826,17 +1826,42 @@ static bool need_update(int cpu)
}

#ifdef CONFIG_PROC_FS
-static void refresh_vm_stats(struct work_struct *work)
+static bool need_drain_remote_zones(int cpu)
+{
+#ifdef CONFIG_NUMA
+ struct zone *zone;
+
+ for_each_populated_zone(zone) {
+ struct per_cpu_pages *pcp;
+ pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
+
+ if (!pcp->count)
+ continue;
+
+ if (!pcp->expire)
+ continue;
+
+ if (zone_to_nid(zone) == cpu_to_node(cpu))
+ continue;
+
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+static long refresh_vm_stats(void *arg)
{
refresh_cpu_vm_stats(true);
+ return 0;
}

int vmstat_refresh(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
long val;
- int err;
- int i;
+ int i, cpu;

/*
* The regular update, every sysctl_stat_interval, may come later
@@ -1850,9 +1875,15 @@ int vmstat_refresh(struct ctl_table *tab
* transiently negative values, report an error here if any of
* the stats is negative, so we know to go looking for imbalance.
*/
- err = schedule_on_each_cpu(refresh_vm_stats);
- if (err)
- return err;
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ if (need_update(cpu) || need_drain_remote_zones(cpu))
+ work_on_cpu(cpu, refresh_vm_stats, NULL);
+
+ cond_resched();
+ }
+ put_online_cpus();
+
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
/*
* Skip checking stats known to go negative occasionally.