[PATCH v2] ksm: allow dedup all tasks memory

From: Timofey Titovets
Date: Sun Nov 11 2018 - 16:27:24 EST


From: Timofey Titovets <nefelim4ag@xxxxxxxxx>

ksm by default working only on memory that added by
madvice().

And only way get that work on other applications:
- Use LD_PRELOAD and libraries
- Patch kernel

Lets use kernel task list in ksm_scan_thread and add logic to allow ksm
import VMA from tasks.
That behaviour controlled by new attribute: mode
I try mimic hugepages attribute, so mode have two states:
- normal - old default behaviour
- always [new] - allow ksm to get tasks vma and try working on that.

To reduce CPU load & tasklist locking time,
ksm try import VMAs from one task per loop.

So add new attribute "mode"
Two passible values:
- normal [default] - ksm use only madvice
- always [new] - ksm will search vma over all processes memory and
add it to the dedup list

v1 -> v2:
- Rebase on v4.19.1

Signed-off-by: Timofey Titovets <nefelim4ag@xxxxxxxxx>
---
Documentation/admin-guide/mm/ksm.rst | 7 ++
mm/ksm.c | 149 ++++++++++++++++++++++-----
2 files changed, 128 insertions(+), 28 deletions(-)

diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst
index 9303786632d1..253f94a09be8 100644
--- a/Documentation/admin-guide/mm/ksm.rst
+++ b/Documentation/admin-guide/mm/ksm.rst
@@ -116,6 +116,13 @@ run
Default: 0 (must be changed to 1 to activate KSM, except if
CONFIG_SYSFS is disabled)

+mode
+ * set always to allow ksm deduplicate memory of every process
+ * set normal to use only madviced memory
+
+ Default: normal (dedupulicate only madviced memory as in
+ earlier releases)
+
use_zero_pages
specifies whether empty pages (i.e. allocated pages that only
contain zeroes) should be treated specially. When set to 1,
diff --git a/mm/ksm.c b/mm/ksm.c
index 1a088306ef81..5097d710c466 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -295,6 +295,10 @@ static int ksm_nr_node_ids = 1;
static unsigned long ksm_run = KSM_RUN_STOP;
static void wait_while_offlining(void);

+#define KSM_MODE_NORMAL 0
+#define KSM_MODE_ALWAYS 1
+static unsigned long ksm_mode = KSM_MODE_NORMAL;
+
static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
static DEFINE_MUTEX(ksm_thread_mutex);
static DEFINE_SPINLOCK(ksm_mmlist_lock);
@@ -303,6 +307,11 @@ static DEFINE_SPINLOCK(ksm_mmlist_lock);
sizeof(struct __struct), __alignof__(struct __struct),\
(__flags), NULL)

+static inline int ksm_mode_always(void)
+{
+ return (ksm_mode == KSM_MODE_ALWAYS);
+}
+
static int __init ksm_slab_init(void)
{
rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
@@ -2386,17 +2395,94 @@ static void ksm_do_scan(unsigned int scan_npages)

static int ksmd_should_run(void)
{
- return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
+ return (ksm_run & KSM_RUN_MERGE) &&
+ (!list_empty(&ksm_mm_head.mm_list) || ksm_mode_always());
+}
+
+
+static int ksm_enter(struct mm_struct *mm, unsigned long *vm_flags)
+{
+ int err;
+
+ if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
+ VM_PFNMAP | VM_IO | VM_DONTEXPAND |
+ VM_HUGETLB | VM_MIXEDMAP))
+ return 0;
+
+#ifdef VM_SAO
+ if (*vm_flags & VM_SAO)
+ return 0;
+#endif
+#ifdef VM_SPARC_ADI
+ if (*vm_flags & VM_SPARC_ADI)
+ return 0;
+#endif
+ if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
+ err = __ksm_enter(mm);
+ if (err)
+ return err;
+ }
+
+ *vm_flags |= VM_MERGEABLE;
+
+ return 0;
+}
+
+/*
+ * Register all vmas for all processes in the system with KSM.
+ * Note that every call to ksm_madvise, for a given vma, after the first
+ * does nothing but set flags.
+ */
+void ksm_import_task_vma(struct task_struct *task)
+{
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ int error;
+
+ mm = get_task_mm(task);
+ if (!mm)
+ return;
+ down_write(&mm->mmap_sem);
+ vma = mm->mmap;
+ while (vma) {
+ error = ksm_enter(vma->vm_mm, &vma->vm_flags);
+ vma = vma->vm_next;
+ }
+ up_write(&mm->mmap_sem);
+ mmput(mm);
+ return;
}

static int ksm_scan_thread(void *nothing)
{
+ pid_t last_pid = 1;
+ pid_t curr_pid;
+ struct task_struct *task;
+
set_freezable();
set_user_nice(current, 5);

while (!kthread_should_stop()) {
mutex_lock(&ksm_thread_mutex);
wait_while_offlining();
+ if (ksm_mode_always()) {
+ /*
+ * import one task's vma per run
+ */
+ read_lock(&tasklist_lock);
+
+ for_each_process(task) {
+ curr_pid = task_pid_nr(task);
+ if (curr_pid == last_pid)
+ break;
+ }
+
+ task = next_task(task);
+ last_pid = task_pid_nr(task);
+
+ ksm_import_task_vma(task);
+ read_unlock(&tasklist_lock);
+ }
if (ksmd_should_run())
ksm_do_scan(ksm_thread_pages_to_scan);
mutex_unlock(&ksm_thread_mutex);
@@ -2422,33 +2508,9 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,

switch (advice) {
case MADV_MERGEABLE:
- /*
- * Be somewhat over-protective for now!
- */
- if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
- VM_PFNMAP | VM_IO | VM_DONTEXPAND |
- VM_HUGETLB | VM_MIXEDMAP))
- return 0; /* just ignore the advice */
-
- if (vma_is_dax(vma))
- return 0;
-
-#ifdef VM_SAO
- if (*vm_flags & VM_SAO)
- return 0;
-#endif
-#ifdef VM_SPARC_ADI
- if (*vm_flags & VM_SPARC_ADI)
- return 0;
-#endif
-
- if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
- err = __ksm_enter(mm);
- if (err)
- return err;
- }
-
- *vm_flags |= VM_MERGEABLE;
+ err = ksm_enter(mm, vm_flags);
+ if (err)
+ return err;
break;

case MADV_UNMERGEABLE:
@@ -2852,6 +2914,36 @@ static ssize_t pages_to_scan_store(struct kobject *kobj,
}
KSM_ATTR(pages_to_scan);

+static ssize_t mode_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ switch (ksm_mode) {
+ case KSM_MODE_NORMAL:
+ return sprintf(buf, "always [normal]\n");
+ break;
+ case KSM_MODE_ALWAYS:
+ return sprintf(buf, "[always] normal\n");
+ break;
+ }
+
+ return sprintf(buf, "always [normal]\n");
+}
+
+static ssize_t mode_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ if (!memcmp("always", buf, min(sizeof("always")-1, count))) {
+ ksm_mode = KSM_MODE_ALWAYS;
+ wake_up_interruptible(&ksm_thread_wait);
+ } else if (!memcmp("normal", buf, min(sizeof("normal")-1, count))) {
+ ksm_mode = KSM_MODE_NORMAL;
+ } else
+ return -EINVAL;
+
+ return count;
+}
+KSM_ATTR(mode);
+
static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
@@ -3109,6 +3201,7 @@ KSM_ATTR_RO(full_scans);
static struct attribute *ksm_attrs[] = {
&sleep_millisecs_attr.attr,
&pages_to_scan_attr.attr,
+ &mode_attr.attr,
&run_attr.attr,
&pages_shared_attr.attr,
&pages_sharing_attr.attr,
--
2.19.1