[PATCH v2] mm/vmalloc: Keep a separate lazy-free list

From: Chris Wilson
Date: Fri Apr 15 2016 - 07:08:18 EST


When mixing lots of vmallocs and set_memory_*() (which calls
vm_unmap_aliases()) I encountered situations where the performance
degraded severely due to the walking of the entire vmap_area list each
invocation. One simple improvement is to add the lazily freed vmap_area
to a separate lockless free list, such that we then avoid having to walk
the full list on each purge.

v2: Remove unused VM_LAZY_FREE and VM_LAZY_FREEING flags and reorder
access of vmap_area during addition to the lazy free list to avoid
use-after free (Roman).

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx>
Cc: Daniel Vetter <daniel.vetter@xxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: David Rientjes <rientjes@xxxxxxxxxx>
Cc: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
Cc: Roman Pen <r.peniaev@xxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx>
Cc: Toshi Kani <toshi.kani@xxxxxx>
Cc: Shawn Lin <shawn.lin@xxxxxxxxxxxxxx>
Cc: linux-mm@xxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
---
include/linux/vmalloc.h | 3 ++-
mm/vmalloc.c | 40 ++++++++++++++++++++--------------------
2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 8b51df3ab334..3d9d786a943c 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -4,6 +4,7 @@
#include <linux/spinlock.h>
#include <linux/init.h>
#include <linux/list.h>
+#include <linux/llist.h>
#include <asm/page.h> /* pgprot_t */
#include <linux/rbtree.h>

@@ -45,7 +46,7 @@ struct vmap_area {
unsigned long flags;
struct rb_node rb_node; /* address sorted rbtree */
struct list_head list; /* address sorted list */
- struct list_head purge_list; /* "lazy purge" list */
+ struct llist_node purge_list; /* "lazy purge" list */
struct vm_struct *vm;
struct rcu_head rcu_head;
};
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 293889d7f482..70f942832164 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -21,6 +21,7 @@
#include <linux/debugobjects.h>
#include <linux/kallsyms.h>
#include <linux/list.h>
+#include <linux/llist.h>
#include <linux/notifier.h>
#include <linux/rbtree.h>
#include <linux/radix-tree.h>
@@ -275,13 +276,12 @@ EXPORT_SYMBOL(vmalloc_to_pfn);

/*** Global kva allocator ***/

-#define VM_LAZY_FREE 0x01
-#define VM_LAZY_FREEING 0x02
#define VM_VM_AREA 0x04

static DEFINE_SPINLOCK(vmap_area_lock);
/* Export for kexec only */
LIST_HEAD(vmap_area_list);
+static LLIST_HEAD(vmap_purge_list);
static struct rb_root vmap_area_root = RB_ROOT;

/* The vmap cache globals are protected by vmap_area_lock */
@@ -628,7 +628,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
int sync, int force_flush)
{
static DEFINE_SPINLOCK(purge_lock);
- LIST_HEAD(valist);
+ struct llist_node *valist;
struct vmap_area *va;
struct vmap_area *n_va;
int nr = 0;
@@ -647,20 +647,14 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
if (sync)
purge_fragmented_blocks_allcpus();

- rcu_read_lock();
- list_for_each_entry_rcu(va, &vmap_area_list, list) {
- if (va->flags & VM_LAZY_FREE) {
- if (va->va_start < *start)
- *start = va->va_start;
- if (va->va_end > *end)
- *end = va->va_end;
- nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
- list_add_tail(&va->purge_list, &valist);
- va->flags |= VM_LAZY_FREEING;
- va->flags &= ~VM_LAZY_FREE;
- }
+ valist = llist_del_all(&vmap_purge_list);
+ llist_for_each_entry(va, valist, purge_list) {
+ if (va->va_start < *start)
+ *start = va->va_start;
+ if (va->va_end > *end)
+ *end = va->va_end;
+ nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
}
- rcu_read_unlock();

if (nr)
atomic_sub(nr, &vmap_lazy_nr);
@@ -670,7 +664,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,

if (nr) {
spin_lock(&vmap_area_lock);
- list_for_each_entry_safe(va, n_va, &valist, purge_list)
+ llist_for_each_entry_safe(va, n_va, valist, purge_list)
__free_vmap_area(va);
spin_unlock(&vmap_area_lock);
}
@@ -705,9 +699,15 @@ static void purge_vmap_area_lazy(void)
*/
static void free_vmap_area_noflush(struct vmap_area *va)
{
- va->flags |= VM_LAZY_FREE;
- atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
- if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
+ int nr_lazy;
+
+ nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT,
+ &vmap_lazy_nr);
+
+ /* After this point, we may free va at any time */
+ llist_add(&va->purge_list, &vmap_purge_list);
+
+ if (unlikely(nr_lazy > lazy_max_pages()))
try_purge_vmap_area_lazy();
}

--
2.8.0.rc3