[patch 5/5] mm: vmstat_refresh: avoid queueing work item if cpu stats are clean

From: Marcelo Tosatti
Date: Wed Jul 14 2021 - 16:43:56 EST


It is not necessary to queue work item to run refresh_vm_stats
on a remote CPU if that CPU has no dirty stats and no per-CPU
allocations for remote nodes.

This fixes sosreport hang (which uses vmstat_refresh) with
spinning SCHED_FIFO process.

Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx>

Index: linux-2.6-vmstat-update/mm/vmstat.c
===================================================================
--- linux-2.6-vmstat-update.orig/mm/vmstat.c
+++ linux-2.6-vmstat-update/mm/vmstat.c
@@ -1888,17 +1888,41 @@ static bool need_update(int cpu)
}

#ifdef CONFIG_PROC_FS
-static void refresh_vm_stats(struct work_struct *work)
+static bool need_drain_remote_zones(int cpu)
+{
+ struct zone *zone;
+
+ for_each_populated_zone(zone) {
+ struct per_cpu_pageset *p;
+
+ p = per_cpu_ptr(zone->pageset, cpu);
+
+ if (!p->pcp.count)
+ continue;
+#ifdef CONFIG_NUMA
+ if (!p->expire)
+ continue;
+#endif
+ if (zone_to_nid(zone) == cpu_to_node(cpu))
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+
+static long refresh_vm_stats(void *arg)
{
refresh_cpu_vm_stats(true);
+ return 0;
}

int vmstat_refresh(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
long val;
- int err;
- int i;
+ int i, cpu;

/*
* The regular update, every sysctl_stat_interval, may come later
@@ -1912,9 +1936,15 @@ int vmstat_refresh(struct ctl_table *tab
* transiently negative values, report an error here if any of
* the stats is negative, so we know to go looking for imbalance.
*/
- err = schedule_on_each_cpu(refresh_vm_stats);
- if (err)
- return err;
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ if (need_update(cpu) || need_drain_remote_zones(cpu))
+ work_on_cpu(cpu, refresh_vm_stats, NULL);
+
+ cond_resched();
+ }
+ put_online_cpus();
+
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
/*
* Skip checking stats known to go negative occasionally.