[55/73] mm: vmstat: use a single setter function and callback for adjusting percpu thresholds

From: Greg KH
Date: Fri Mar 04 2011 - 20:04:49 EST


2.6.37-stable review patch. If anyone has any objections, please let us know.

------------------

From: Mel Gorman <mel@xxxxxxxxx>

commit b44129b30652c8771db2265939bb8b463724043d upstream.

reduce_pgdat_percpu_threshold() and restore_pgdat_percpu_threshold() exist
to adjust the per-cpu vmstat thresholds while kswapd is awake to avoid
errors due to counter drift. The functions duplicate some code so this
patch replaces them with a single set_pgdat_percpu_threshold() that takes
a callback function to calculate the desired threshold as a parameter.

[akpm@xxxxxxxxxxxxxxxxxxxx: readability tweak]
[kosaki.motohiro@xxxxxxxxxxxxxx: set_pgdat_percpu_threshold(): don't use for_each_online_cpu]
Signed-off-by: Mel Gorman <mel@xxxxxxxxx>
Reviewed-by: Christoph Lameter <cl@xxxxxxxxx>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxx>

---
include/linux/vmstat.h | 10 ++++++----
mm/vmscan.c | 19 +++++++++++++++++--
mm/vmstat.c | 36 +++++++-----------------------------
3 files changed, 30 insertions(+), 35 deletions(-)

--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -254,8 +254,11 @@ extern void dec_zone_state(struct zone *
extern void __dec_zone_state(struct zone *, enum zone_stat_item);

void refresh_cpu_vm_stats(int);
-void reduce_pgdat_percpu_threshold(pg_data_t *pgdat);
-void restore_pgdat_percpu_threshold(pg_data_t *pgdat);
+
+int calculate_pressure_threshold(struct zone *zone);
+int calculate_normal_threshold(struct zone *zone);
+void set_pgdat_percpu_threshold(pg_data_t *pgdat,
+ int (*calculate_pressure)(struct zone *));
#else /* CONFIG_SMP */

/*
@@ -300,8 +303,7 @@ static inline void __dec_zone_page_state
#define dec_zone_page_state __dec_zone_page_state
#define mod_zone_page_state __mod_zone_page_state

-static inline void reduce_pgdat_percpu_threshold(pg_data_t *pgdat) { }
-static inline void restore_pgdat_percpu_threshold(pg_data_t *pgdat) { }
+#define set_pgdat_percpu_threshold(pgdat, callback) { }

static inline void refresh_cpu_vm_stats(int cpu) { }
#endif
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2448,9 +2448,24 @@ static int kswapd(void *p)
*/
if (!sleeping_prematurely(pgdat, order, remaining)) {
trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
- restore_pgdat_percpu_threshold(pgdat);
+
+ /*
+ * vmstat counters are not perfectly
+ * accurate and the estimated value
+ * for counters such as NR_FREE_PAGES
+ * can deviate from the true value by
+ * nr_online_cpus * threshold. To
+ * avoid the zone watermarks being
+ * breached while under pressure, we
+ * reduce the per-cpu vmstat threshold
+ * while kswapd is awake and restore
+ * them before going back to sleep.
+ */
+ set_pgdat_percpu_threshold(pgdat,
+ calculate_normal_threshold);
schedule();
- reduce_pgdat_percpu_threshold(pgdat);
+ set_pgdat_percpu_threshold(pgdat,
+ calculate_pressure_threshold);
} else {
if (remaining)
count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -83,7 +83,7 @@ EXPORT_SYMBOL(vm_stat);

#ifdef CONFIG_SMP

-static int calculate_pressure_threshold(struct zone *zone)
+int calculate_pressure_threshold(struct zone *zone)
{
int threshold;
int watermark_distance;
@@ -107,7 +107,7 @@ static int calculate_pressure_threshold(
return threshold;
}

-static int calculate_threshold(struct zone *zone)
+int calculate_normal_threshold(struct zone *zone)
{
int threshold;
int mem; /* memory in 128 MB units */
@@ -166,7 +166,7 @@ static void refresh_zone_stat_thresholds
for_each_populated_zone(zone) {
unsigned long max_drift, tolerate_drift;

- threshold = calculate_threshold(zone);
+ threshold = calculate_normal_threshold(zone);

for_each_online_cpu(cpu)
per_cpu_ptr(zone->pageset, cpu)->stat_threshold
@@ -185,46 +185,24 @@ static void refresh_zone_stat_thresholds
}
}

-void reduce_pgdat_percpu_threshold(pg_data_t *pgdat)
+void set_pgdat_percpu_threshold(pg_data_t *pgdat,
+ int (*calculate_pressure)(struct zone *))
{
struct zone *zone;
int cpu;
int threshold;
int i;

- get_online_cpus();
for (i = 0; i < pgdat->nr_zones; i++) {
zone = &pgdat->node_zones[i];
if (!zone->percpu_drift_mark)
continue;

- threshold = calculate_pressure_threshold(zone);
- for_each_online_cpu(cpu)
- per_cpu_ptr(zone->pageset, cpu)->stat_threshold
- = threshold;
- }
- put_online_cpus();
-}
-
-void restore_pgdat_percpu_threshold(pg_data_t *pgdat)
-{
- struct zone *zone;
- int cpu;
- int threshold;
- int i;
-
- get_online_cpus();
- for (i = 0; i < pgdat->nr_zones; i++) {
- zone = &pgdat->node_zones[i];
- if (!zone->percpu_drift_mark)
- continue;
-
- threshold = calculate_threshold(zone);
- for_each_online_cpu(cpu)
+ threshold = (*calculate_pressure)(zone);
+ for_each_possible_cpu(cpu)
per_cpu_ptr(zone->pageset, cpu)->stat_threshold
= threshold;
}
- put_online_cpus();
}

/*


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/