[RFC PATCH V0 DO NOT MERGE 10/10] kmmscand: Add scanning

From: Raghavendra K T
Date: Sun Dec 01 2024 - 10:40:58 EST


overhead caclulation support

Intended to be used only for experimental purpose.
Not to be merged.

Signed-off-by: Raghavendra K T <raghavendra.kt@xxxxxxx>
---
include/linux/mm.h | 3 +++
include/linux/vm_event_item.h | 4 ++++
kernel/sched/fair.c | 13 ++++++++-----
mm/huge_memory.c | 1 +
mm/kmmscand.c | 9 +++++++++
mm/memory.c | 12 ++++++++----
mm/vmstat.c | 4 ++++
7 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 306452c11d31..7380aab1fa62 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -600,6 +600,7 @@ struct vm_fault {
* page table to avoid allocation from
* atomic context.
*/
+ unsigned long start_time;
};

/*
@@ -690,6 +691,8 @@ void count_kmmscand_migrate_failed(void);
void count_kmmscand_slowtier(void);
void count_kmmscand_toptier(void);
void count_kmmscand_idlepage(void);
+void count_kmmscand_scan_oh(long delta);
+void count_kmmscand_migration_oh(long delta);
#endif

#ifdef CONFIG_NUMA_BALANCING
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index b2ccd4f665aa..4c7eaea01f13 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -64,6 +64,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
NUMA_HINT_FAULTS,
NUMA_HINT_FAULTS_LOCAL,
NUMA_PAGE_MIGRATE,
+ NUMA_TASK_WORK_OH,
+ NUMA_HF_MIGRATION_OH,
#endif
#ifdef CONFIG_KMMSCAND
KMMSCAND_MM_SCANS,
@@ -74,6 +76,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
KMMSCAND_SLOWTIER,
KMMSCAND_TOPTIER,
KMMSCAND_IDLEPAGE,
+ KMMSCAND_SCAN_OH,
+ KMMSCAND_MIGRATION_OH,
#endif
#ifdef CONFIG_MIGRATION
PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fbdca89c677f..d205be30ae6c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3299,6 +3299,7 @@ static void task_numa_work(struct callback_head *work)
struct vma_iterator vmi;
bool vma_pids_skipped;
bool vma_pids_forced = false;
+ unsigned long old = jiffies;

SCHED_WARN_ON(p != container_of(work, struct task_struct, numa_work));

@@ -3312,7 +3313,7 @@ static void task_numa_work(struct callback_head *work)
* work.
*/
if (p->flags & PF_EXITING)
- return;
+ goto out1;

if (!mm->numa_next_scan) {
mm->numa_next_scan = now +
@@ -3324,7 +3325,7 @@ static void task_numa_work(struct callback_head *work)
*/
migrate = mm->numa_next_scan;
if (time_before(now, migrate))
- return;
+ goto out1;

if (p->numa_scan_period == 0) {
p->numa_scan_period_max = task_scan_max(p);
@@ -3333,7 +3334,7 @@ static void task_numa_work(struct callback_head *work)

next_scan = now + msecs_to_jiffies(p->numa_scan_period);
if (!try_cmpxchg(&mm->numa_next_scan, &migrate, next_scan))
- return;
+ goto out1;

/*
* Delay this task enough that another task of this mm will likely win
@@ -3345,11 +3346,11 @@ static void task_numa_work(struct callback_head *work)
pages <<= 20 - PAGE_SHIFT; /* MB in pages */
virtpages = pages * 8; /* Scan up to this much virtual space */
if (!pages)
- return;
+ goto out1;


if (!mmap_read_trylock(mm))
- return;
+ goto out1;

/*
* VMAs are skipped if the current PID has not trapped a fault within
@@ -3526,6 +3527,8 @@ static void task_numa_work(struct callback_head *work)
u64 diff = p->se.sum_exec_runtime - runtime;
p->node_stamp += 32 * diff;
}
+out1:
+ __count_vm_events(NUMA_TASK_WORK_OH, jiffies_to_usecs(jiffies - old));
}

void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ee335d96fc39..d948d1fbbffd 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1250,6 +1250,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf)
spin_unlock(vmf->ptl);
}

+ __count_vm_events(NUMA_HF_MIGRATION_OH, jiffies_to_usecs(jiffies - vmf->start_time));
return 0;
unlock_release:
spin_unlock(vmf->ptl);
diff --git a/mm/kmmscand.c b/mm/kmmscand.c
index 70f588a210dd..bd2c65f38da2 100644
--- a/mm/kmmscand.c
+++ b/mm/kmmscand.c
@@ -644,8 +644,10 @@ static void kmmscand_cleanup_migration_list(struct mm_struct *mm)
static void kmmscand_migrate_folio(void)
{
int ret = 0;
+ unsigned long tstart, tend;
struct kmmscand_migrate_info *info, *tmp;

+ tstart = jiffies;
spin_lock(&kmmscand_migrate_lock);

if (!list_empty(&kmmscand_migrate_list.migrate_head)) {
@@ -691,6 +693,8 @@ static void kmmscand_migrate_folio(void)
}
}
spin_unlock(&kmmscand_migrate_lock);
+ tend = jiffies;
+ __count_vm_events(KMMSCAND_MIGRATION_OH, jiffies_to_usecs(tend - tstart));
}

/*
@@ -788,6 +792,8 @@ static unsigned long kmmscand_scan_mm_slot(void)

unsigned int mm_slot_scan_period;
unsigned long now;
+
+ unsigned long tstart, tend;
unsigned long mm_slot_next_scan;
unsigned long mm_slot_scan_size;
unsigned long scanned_size = 0;
@@ -800,6 +806,7 @@ static unsigned long kmmscand_scan_mm_slot(void)
struct vm_area_struct *vma = NULL;
struct kmmscand_mm_slot *mm_slot;

+ tstart = jiffies;
/* Retrieve mm */
spin_lock(&kmmscand_mm_lock);

@@ -917,6 +924,8 @@ static unsigned long kmmscand_scan_mm_slot(void)
}

spin_unlock(&kmmscand_mm_lock);
+ tend = jiffies;
+ __count_vm_events(KMMSCAND_SCAN_OH, jiffies_to_usecs(tend - tstart));
return total;
}

diff --git a/mm/memory.c b/mm/memory.c
index 75c2dfd04f72..baea436124b0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5590,7 +5590,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)

if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
- return 0;
+ goto out;
}

pte = pte_modify(old_pte, vma->vm_page_prot);
@@ -5629,17 +5629,18 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
nid = target_nid;
flags |= TNF_MIGRATED;
task_numa_fault(last_cpupid, nid, nr_pages, flags);
- return 0;
+ goto out;
}

flags |= TNF_MIGRATE_FAIL;
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
if (unlikely(!vmf->pte))
- return 0;
+ goto out;
+
if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
- return 0;
+ goto out;
}
out_map:
/*
@@ -5656,6 +5657,8 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)

if (nid != NUMA_NO_NODE)
task_numa_fault(last_cpupid, nid, nr_pages, flags);
+out:
+ __count_vm_events(NUMA_HF_MIGRATION_OH, jiffies_to_usecs(jiffies - vmf->start_time));
return 0;
}

@@ -5858,6 +5861,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
.flags = flags,
.pgoff = linear_page_index(vma, address),
.gfp_mask = __get_fault_gfp_mask(vma),
+ .start_time = jiffies,
};
struct mm_struct *mm = vma->vm_mm;
unsigned long vm_flags = vma->vm_flags;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index d758e7155042..b7fe51342970 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1339,6 +1339,8 @@ const char * const vmstat_text[] = {
"numa_hint_faults",
"numa_hint_faults_local",
"numa_pages_migrated",
+ "numa_task_work_oh",
+ "numa_hf_migration_oh",
#endif
#ifdef CONFIG_KMMSCAND
"nr_kmmscand_mm_scans",
@@ -1349,6 +1351,8 @@ const char * const vmstat_text[] = {
"nr_kmmscand_slowtier",
"nr_kmmscand_toptier",
"nr_kmmscand_idlepage",
+ "kmmscand_scan_oh",
+ "kmmscand_migration_oh",
#endif
#ifdef CONFIG_MIGRATION
"pgmigrate_success",
--
2.39.3