[PATCH] mm: vmpressure: make vmpressure_window a tunable.

From: Martijn Coenen
Date: Wed Feb 03 2016 - 06:08:44 EST


The window size used for calculating vm pressure
events was previously fixed at 512 pages. The
window size has a big impact on the rate of notifications
sent off to userspace, in particular when using the
"low" level. On machines with a lot of memory, the
current value may be excessive.

On the other hand, making the window size depend on
machine size does not allow userspace to change the
notification rate based on the current state of the
system. For example, when a lot of memory is still
available, userspace may want to increase the window
since it's not interested in receiving notifications
for every 2MB scanned.

This patch makes vmpressure_window a sysctl tunable.

Signed-off-by: Martijn Coenen <maco@xxxxxxxxxx>
---
Documentation/sysctl/vm.txt | 15 +++++++++++++++
include/linux/vmpressure.h | 1 +
kernel/sysctl.c | 11 +++++++++++
mm/vmpressure.c | 5 ++---
4 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 89a887c..0fa4846 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -60,6 +60,7 @@ Currently, these files are in /proc/sys/vm:
- swappiness
- user_reserve_kbytes
- vfs_cache_pressure
+- vmpressure_window
- zone_reclaim_mode

==============================================================
@@ -805,6 +806,20 @@ ten times more freeable objects than there are.

==============================================================

+vmpressure_window
+
+The vmpressure algorithm calculates vm pressure by looking
+at the number of pages reclaimed vs the number of pages scanned.
+The vmpressure_window tunable specifies the minimum amount
+of pages that needs to be scanned before sending any vmpressure
+event. Setting a small window size can cause a lot of false
+positives; setting a large window size may delay notifications
+for too long.
+
+The default value is 512 pages.
+
+==============================================================
+
zone_reclaim_mode:

Zone_reclaim_mode allows someone to set more or less aggressive approaches to
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h
index 3347cc3..b5341d0 100644
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -29,6 +29,7 @@ struct vmpressure {
struct mem_cgroup;

#ifdef CONFIG_MEMCG
+extern unsigned long vmpressure_win;
extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
unsigned long scanned, unsigned long reclaimed);
extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 97715fd..64938ad 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -51,6 +51,7 @@
#include <linux/dnotify.h>
#include <linux/syscalls.h>
#include <linux/vmstat.h>
+#include <linux/vmpressure.h>
#include <linux/nfs_fs.h>
#include <linux/acpi.h>
#include <linux/reboot.h>
@@ -1590,6 +1591,16 @@ static struct ctl_table vm_table[] = {
.extra2 = (void *)&mmap_rnd_compat_bits_max,
},
#endif
+#ifdef CONFIG_MEMCG
+ {
+ .procname = "vmpressure_window",
+ .data = &vmpressure_win,
+ .maxlen = sizeof(vmpressure_win),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &one,
+ },
+#endif
{ }
};

diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 9a6c070..bda6af9 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -35,10 +35,9 @@
* As the vmscan reclaimer logic works with chunks which are multiple of
* SWAP_CLUSTER_MAX, it makes sense to use it for the window size as well.
*
- * TODO: Make the window size depend on machine size, as we do for vmstat
- * thresholds. Currently we set it to 512 pages (2MB for 4KB pages).
+ * The window size is a tunable sysctl.
*/
-static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16;
+unsigned long __read_mostly vmpressure_win = SWAP_CLUSTER_MAX * 16;

/*
* These thresholds are used when we account memory pressure through
--
2.7.0.rc3.207.g0ac5344