[PATCH 3/6] mm/migrate: Add copy_pages_mthread function

From: Anshuman Khandual
Date: Thu Mar 09 2017 - 02:11:29 EST


From: Zi Yan <ziy@xxxxxxxxxx>

This change adds a new function copy_pages_mthread to enable multi threaded
page copy which can be utilized during migration. This function splits the
page copy request into multiple threads which will handle individual chunk
and send them as jobs to system_highpri_wq work queue.

Signed-off-by: Zi Yan <zi.yan@xxxxxxxxxxxxxx>
Signed-off-by: Anshuman Khandual <khandual@xxxxxxxxxxxxxxxxxx>
---
* Updated cthread calculations, taking care of divide by zero issues,
picking up the right single thread, defining NR_COPYTHREADS, fixing
the build problem on i386 etc.

include/linux/highmem.h | 2 ++
mm/Makefile | 2 ++
mm/copy_pages_mthread.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 99 insertions(+)
create mode 100644 mm/copy_pages_mthread.c

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index bb3f329..e1f4f1b 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -236,6 +236,8 @@ static inline void copy_user_highpage(struct page *to, struct page *from,

#endif

+int copy_pages_mthread(struct page *to, struct page *from, int nr_pages);
+
static inline void copy_highpage(struct page *to, struct page *from)
{
char *vfrom, *vto;
diff --git a/mm/Makefile b/mm/Makefile
index 295bd7a..cc27e76 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -41,6 +41,8 @@ obj-y := filemap.o mempool.o oom_kill.o \

obj-y += init-mm.o

+obj-y += copy_pages_mthread.o
+
ifdef CONFIG_NO_BOOTMEM
obj-y += nobootmem.o
else
diff --git a/mm/copy_pages_mthread.c b/mm/copy_pages_mthread.c
new file mode 100644
index 0000000..5af861c
--- /dev/null
+++ b/mm/copy_pages_mthread.c
@@ -0,0 +1,95 @@
+/*
+ * This implements parallel page copy function through multi
+ * threaded work queues.
+ *
+ * Copyright (C) Zi Yan <ziy@xxxxxxxxxx>, Nov 2016
+ *
+ * Licensed under the terms of the GNU GPL, version 2.
+ */
+#include <linux/highmem.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/freezer.h>
+
+/*
+ * NR_COPYTHREADS can be the highest number of threads for given
+ * node on any architecture. The actual number of copy threads
+ * will be limited by the cpumask weight of the target node.
+ */
+#define NR_COPYTHREADS 8
+
+struct copy_info {
+ struct work_struct copy_work;
+ char *to;
+ char *from;
+ unsigned long chunk_size;
+};
+
+static void copy_pages(char *vto, char *vfrom, unsigned long size)
+{
+ memcpy(vto, vfrom, size);
+}
+
+static void copythread(struct work_struct *work)
+{
+ struct copy_info *info = (struct copy_info *) work;
+
+ copy_pages(info->to, info->from, info->chunk_size);
+}
+
+int copy_pages_mthread(struct page *to, struct page *from, int nr_pages)
+{
+ struct cpumask *cpumask;
+ struct copy_info *work_items;
+ char *vto, *vfrom;
+ unsigned long i, cthreads, cpu, node, chunk_size;
+ int cpu_id_list[NR_COPYTHREADS] = {0};
+
+ node = page_to_nid(to);
+ cpumask = (struct cpumask *) cpumask_of_node(node);
+ cthreads = min_t(unsigned int, NR_COPYTHREADS, cpumask_weight(cpumask));
+ cthreads = (cthreads / 2) * 2;
+ if (!cthreads)
+ cthreads = 1;
+
+ work_items = kcalloc(cthreads, sizeof(struct copy_info), GFP_KERNEL);
+ if (!work_items)
+ return -ENOMEM;
+
+ /*
+ * XXX: On a memory-only CPU-less NUMA node it will
+ * just fallback using cpu[0] in a single threaded
+ * manner to do the page copy. On single CPU target
+ * node that CPU will be used for the page copy.
+ */
+ i = 0;
+ for_each_cpu(cpu, cpumask) {
+ if (i >= cthreads)
+ break;
+ cpu_id_list[i] = cpu;
+ ++i;
+ }
+
+ vfrom = kmap(from);
+ vto = kmap(to);
+ chunk_size = PAGE_SIZE * nr_pages / cthreads;
+
+ for (i = 0; i < cthreads; ++i) {
+ INIT_WORK((struct work_struct *) &work_items[i], copythread);
+
+ work_items[i].to = vto + i * chunk_size;
+ work_items[i].from = vfrom + i * chunk_size;
+ work_items[i].chunk_size = chunk_size;
+
+ queue_work_on(cpu_id_list[i], system_highpri_wq,
+ (struct work_struct *) &work_items[i]);
+ }
+
+ for (i = 0; i < cthreads; ++i)
+ flush_work((struct work_struct *) &work_items[i]);
+
+ kunmap(to);
+ kunmap(from);
+ kfree(work_items);
+ return 0;
+}
--
2.1.4