[PATCH 7/9] mm: vmalloc: Insert lazy-VA per-cpu zone

From: Uladzislau Rezki (Sony)
Date: Mon May 22 2023 - 07:15:18 EST


Similar to busy VAs, lazy ones are stored per a CPU zone
also. Freed address is converted into a correct zone it
belongs to and resides there for further handling.

Such approach does not require to have any global locking
primitive, instead an access becomes scalable to number of
CPUs.

This patch removes a global purge-lock, global purge-tree
and list.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@xxxxxxxxx>
---
mm/vmalloc.c | 127 ++++++++++++++++++++++++++++-----------------------
1 file changed, 71 insertions(+), 56 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index dd83deb5ef4f..fe993c0561dd 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -734,10 +734,6 @@ static DEFINE_SPINLOCK(free_vmap_area_lock);
LIST_HEAD(vmap_area_list);
static bool vmap_initialized __read_mostly;

-static struct rb_root purge_vmap_area_root = RB_ROOT;
-static LIST_HEAD(purge_vmap_area_list);
-static DEFINE_SPINLOCK(purge_vmap_area_lock);
-
/*
* This kmem_cache is used for vmap_area objects. Instead of
* allocating from slab we reuse an object from this cache to
@@ -1792,39 +1788,17 @@ static DEFINE_MUTEX(vmap_purge_lock);
/* for per-CPU blocks */
static void purge_fragmented_blocks_allcpus(void);

-/*
- * Purges all lazily-freed vmap areas.
- */
-static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
+static unsigned long
+purge_cpu_vmap_zone(struct cpu_vmap_zone *z)
{
- unsigned long resched_threshold;
- unsigned int num_purged_areas = 0;
- struct list_head local_purge_list;
+ unsigned long num_purged_areas = 0;
struct vmap_area *va, *n_va;

- lockdep_assert_held(&vmap_purge_lock);
-
- spin_lock(&purge_vmap_area_lock);
- purge_vmap_area_root = RB_ROOT;
- list_replace_init(&purge_vmap_area_list, &local_purge_list);
- spin_unlock(&purge_vmap_area_lock);
-
- if (unlikely(list_empty(&local_purge_list)))
+ if (list_empty(&z->purge_list))
goto out;

- start = min(start,
- list_first_entry(&local_purge_list,
- struct vmap_area, list)->va_start);
-
- end = max(end,
- list_last_entry(&local_purge_list,
- struct vmap_area, list)->va_end);
-
- flush_tlb_kernel_range(start, end);
- resched_threshold = lazy_max_pages() << 1;
-
spin_lock(&free_vmap_area_lock);
- list_for_each_entry_safe(va, n_va, &local_purge_list, list) {
+ list_for_each_entry_safe(va, n_va, &z->purge_list, list) {
unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
unsigned long orig_start = va->va_start;
unsigned long orig_end = va->va_end;
@@ -1846,13 +1820,57 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)

atomic_long_sub(nr, &vmap_lazy_nr);
num_purged_areas++;
-
- if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
- cond_resched_lock(&free_vmap_area_lock);
}
spin_unlock(&free_vmap_area_lock);

out:
+ return num_purged_areas;
+}
+
+/*
+ * Purges all lazily-freed vmap areas.
+ */
+static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
+{
+ unsigned long num_purged_areas = 0;
+ struct cpu_vmap_zone *z;
+ int need_purge = 0;
+ int i;
+
+ lockdep_assert_held(&vmap_purge_lock);
+
+ for_each_possible_cpu(i) {
+ z = per_cpu_ptr(&cpu_vmap_zone, i);
+ INIT_LIST_HEAD(&z->purge_list);
+
+ if (RB_EMPTY_ROOT(&fbl_root(z, LAZY)))
+ continue;
+
+ fbl_lock(z, LAZY);
+ WRITE_ONCE(fbl(z, LAZY, root.rb_node), NULL);
+ list_replace_init(&fbl_head(z, LAZY), &z->purge_list);
+ fbl_unlock(z, LAZY);
+
+ start = min(start,
+ list_first_entry(&z->purge_list,
+ struct vmap_area, list)->va_start);
+
+ end = max(end,
+ list_last_entry(&z->purge_list,
+ struct vmap_area, list)->va_end);
+
+ need_purge++;
+ }
+
+ if (need_purge) {
+ flush_tlb_kernel_range(start, end);
+
+ for_each_possible_cpu(i) {
+ z = per_cpu_ptr(&cpu_vmap_zone, i);
+ num_purged_areas += purge_cpu_vmap_zone(z);
+ }
+ }
+
trace_purge_vmap_area_lazy(start, end, num_purged_areas);
return num_purged_areas > 0;
}
@@ -1870,16 +1888,9 @@ static void purge_vmap_area_lazy(void)

static void drain_vmap_area_work(struct work_struct *work)
{
- unsigned long nr_lazy;
-
- do {
- mutex_lock(&vmap_purge_lock);
- __purge_vmap_area_lazy(ULONG_MAX, 0);
- mutex_unlock(&vmap_purge_lock);
-
- /* Recheck if further work is required. */
- nr_lazy = atomic_long_read(&vmap_lazy_nr);
- } while (nr_lazy > lazy_max_pages());
+ mutex_lock(&vmap_purge_lock);
+ __purge_vmap_area_lazy(ULONG_MAX, 0);
+ mutex_unlock(&vmap_purge_lock);
}

/*
@@ -1889,6 +1900,7 @@ static void drain_vmap_area_work(struct work_struct *work)
*/
static void free_vmap_area_noflush(struct vmap_area *va)
{
+ struct cpu_vmap_zone *z = addr_to_cvz(va->va_start);
unsigned long nr_lazy_max = lazy_max_pages();
unsigned long va_start = va->va_start;
unsigned long nr_lazy;
@@ -1902,10 +1914,9 @@ static void free_vmap_area_noflush(struct vmap_area *va)
/*
* Merge or place it to the purge tree/list.
*/
- spin_lock(&purge_vmap_area_lock);
- merge_or_add_vmap_area(va,
- &purge_vmap_area_root, &purge_vmap_area_list);
- spin_unlock(&purge_vmap_area_lock);
+ fbl_lock(z, LAZY);
+ merge_or_add_vmap_area(va, &fbl_root(z, LAZY), &fbl_head(z, LAZY));
+ fbl_unlock(z, LAZY);

trace_free_vmap_area_noflush(va_start, nr_lazy, nr_lazy_max);

@@ -4199,17 +4210,21 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)

static void show_purge_info(struct seq_file *m)
{
+ struct cpu_vmap_zone *z;
struct vmap_area *va;
+ int i;

- mutex_lock(&vmap_purge_lock);
- spin_lock(&purge_vmap_area_lock);
- list_for_each_entry(va, &purge_vmap_area_list, list) {
- seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
- (void *)va->va_start, (void *)va->va_end,
- va->va_end - va->va_start);
+ for_each_possible_cpu(i) {
+ z = per_cpu_ptr(&cpu_vmap_zone, i);
+
+ fbl_lock(z, LAZY);
+ list_for_each_entry(va, &fbl_head(z, LAZY), list) {
+ seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
+ (void *)va->va_start, (void *)va->va_end,
+ va->va_end - va->va_start);
+ }
+ fbl_unlock(z, LAZY);
}
- spin_unlock(&purge_vmap_area_lock);
- mutex_unlock(&vmap_purge_lock);
}

static int s_show(struct seq_file *m, void *p)
--
2.30.2