[RFC PATCH V0 01/10] mm: Add kmmscand kernel daemon

From: Raghavendra K T
Date: Sun Dec 01 2024 - 10:39:14 EST


Add a skeleton to support scanning and migration.
Also add a config option for the same.

High level design:

While (1):
scan the slowtier pages belonging to VMAs of a task.
Add to migation list
migrate scanned pages to node 0 (default)

The overall code is heavily influenced by khugepaged design.

Signed-off-by: Raghavendra K T <raghavendra.kt@xxxxxxx>
---
mm/Kconfig | 7 ++
mm/Makefile | 1 +
mm/kmmscand.c | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 190 insertions(+)
create mode 100644 mm/kmmscand.c

diff --git a/mm/Kconfig b/mm/Kconfig
index 84000b016808..a0b5ab6a9b67 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -740,6 +740,13 @@ config KSM
until a program has madvised that an area is MADV_MERGEABLE, and
root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set).

+config KMMSCAND
+ bool "Enable PTE A bit scanning and Migration"
+ depends on NUMA_BALANCING
+ help
+ Enable PTE A bit scanning of page. CXL pages accessed are migrated to
+ regular NUMA node (node 0 - default).
+
config DEFAULT_MMAP_MIN_ADDR
int "Low address space to protect from user allocation"
depends on MMU
diff --git a/mm/Makefile b/mm/Makefile
index dba52bb0da8a..1b6b00e39d12 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -94,6 +94,7 @@ obj-$(CONFIG_FAIL_PAGE_ALLOC) += fail_page_alloc.o
obj-$(CONFIG_MEMTEST) += memtest.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_NUMA) += memory-tiers.o
+obj-$(CONFIG_KMMSCAND) += kmmscand.o
obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
diff --git a/mm/kmmscand.c b/mm/kmmscand.c
new file mode 100644
index 000000000000..23cf5638fe10
--- /dev/null
+++ b/mm/kmmscand.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+#include <linux/mm_inline.h>
+#include <linux/kthread.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/cleanup.h>
+
+#include <asm/pgalloc.h>
+#include "internal.h"
+
+
+static struct task_struct *kmmscand_thread __read_mostly;
+static DEFINE_MUTEX(kmmscand_mutex);
+
+/* How long to pause between two scan and migration cycle */
+static unsigned int kmmscand_scan_sleep_ms __read_mostly = 16;
+
+/* Max number of mms to scan in one scan and migration cycle */
+#define KMMSCAND_MMS_TO_SCAN (4 * 1024UL)
+static unsigned long kmmscand_mms_to_scan __read_mostly = KMMSCAND_MMS_TO_SCAN;
+
+volatile bool kmmscand_scan_enabled = true;
+static bool need_wakeup;
+
+static unsigned long kmmscand_sleep_expire;
+
+static DECLARE_WAIT_QUEUE_HEAD(kmmscand_wait);
+
+struct kmmscand_scan {
+ struct list_head mm_head;
+};
+
+struct kmmscand_scan kmmscand_scan = {
+ .mm_head = LIST_HEAD_INIT(kmmscand_scan.mm_head),
+};
+
+static int kmmscand_has_work(void)
+{
+ return !list_empty(&kmmscand_scan.mm_head);
+}
+
+static bool kmmscand_should_wakeup(void)
+{
+ bool wakeup = kthread_should_stop() || need_wakeup ||
+ time_after_eq(jiffies, kmmscand_sleep_expire);
+ if (need_wakeup)
+ need_wakeup = false;
+
+ return wakeup;
+}
+
+static void kmmscand_wait_work(void)
+{
+ if (kmmscand_has_work()) {
+ const unsigned long scan_sleep_jiffies =
+ msecs_to_jiffies(kmmscand_scan_sleep_ms);
+
+ if (!scan_sleep_jiffies)
+ return;
+
+ kmmscand_sleep_expire = jiffies + scan_sleep_jiffies;
+ wait_event_timeout(kmmscand_wait,
+ kmmscand_should_wakeup(),
+ scan_sleep_jiffies);
+ return;
+ }
+}
+
+static void kmmscand_migrate_folio(void)
+{
+}
+
+static unsigned long kmmscand_scan_mm_slot(void)
+{
+ /* placeholder for scanning */
+ msleep(100);
+ return 0;
+}
+
+static void kmmscand_do_scan(void)
+{
+ unsigned long iter = 0, mms_to_scan;
+
+ mms_to_scan = READ_ONCE(kmmscand_mms_to_scan);
+
+ while (true) {
+ cond_resched();
+
+ if (unlikely(kthread_should_stop()) || !READ_ONCE(kmmscand_scan_enabled))
+ break;
+
+ if (kmmscand_has_work())
+ kmmscand_scan_mm_slot();
+
+ kmmscand_migrate_folio();
+ iter++;
+ if (iter >= mms_to_scan)
+ break;
+ }
+}
+
+static int kmmscand(void *none)
+{
+ for (;;) {
+ if (unlikely(kthread_should_stop()))
+ break;
+
+ kmmscand_do_scan();
+
+ while (!READ_ONCE(kmmscand_scan_enabled)) {
+ cpu_relax();
+ kmmscand_wait_work();
+ }
+
+ kmmscand_wait_work();
+ }
+ return 0;
+}
+
+static int start_kmmscand(void)
+{
+ int err = 0;
+
+ guard(mutex)(&kmmscand_mutex);
+
+ /* Some one already succeeded in starting daemon */
+ if (kmmscand_thread)
+ goto end;
+
+ kmmscand_thread = kthread_run(kmmscand, NULL, "kmmscand");
+ if (IS_ERR(kmmscand_thread)) {
+ pr_err("kmmscand: kthread_run(kmmscand) failed\n");
+ err = PTR_ERR(kmmscand_thread);
+ kmmscand_thread = NULL;
+ goto end;
+ } else {
+ pr_info("kmmscand: Successfully started kmmscand");
+ }
+
+ if (!list_empty(&kmmscand_scan.mm_head))
+ wake_up_interruptible(&kmmscand_wait);
+
+end:
+ return err;
+}
+
+static int stop_kmmscand(void)
+{
+ int err = 0;
+
+ guard(mutex)(&kmmscand_mutex);
+
+ if (kmmscand_thread) {
+ kthread_stop(kmmscand_thread);
+ kmmscand_thread = NULL;
+ }
+
+ return err;
+}
+
+static int __init kmmscand_init(void)
+{
+ int err;
+
+ err = start_kmmscand();
+ if (err)
+ goto err_kmmscand;
+
+ return 0;
+
+err_kmmscand:
+ stop_kmmscand();
+
+ return err;
+}
+subsys_initcall(kmmscand_init);
--
2.39.3