[patch 01/20] convert anon_vma list lock a read/write lock

From: Rik van Riel
Date: Tue Dec 18 2007 - 16:17:50 EST


Make the anon_vma list lock a read/write lock. Heaviest use of this
lock is in the page_referenced()/try_to_unmap() calls from vmscan
[shrink_page_list()]. These functions can use a read lock to allow
some parallelism for different cpus trying to reclaim pages mapped
via the same set of vmas.

This change should not change the footprint of the anon_vma in the
non-debug case.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@xxxxxx>
Signed-off-by: Rik van Riel <riel@xxxxxxxxxx>

Index: Linux/include/linux/rmap.h
===================================================================
--- Linux.orig/include/linux/rmap.h 2007-11-28 10:54:36.000000000 -0500
+++ Linux/include/linux/rmap.h 2007-11-28 11:12:37.000000000 -0500
@@ -25,7 +25,7 @@
* pointing to this anon_vma once its vma list is empty.
*/
struct anon_vma {
- spinlock_t lock; /* Serialize access to vma list */
+ rwlock_t rwlock; /* Serialize access to vma list */
struct list_head head; /* List of private "related" vmas */
};

@@ -43,18 +43,21 @@ static inline void anon_vma_free(struct
kmem_cache_free(anon_vma_cachep, anon_vma);
}

+/*
+ * This needs to be a write lock for __vma_link()
+ */
static inline void anon_vma_lock(struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
if (anon_vma)
- spin_lock(&anon_vma->lock);
+ write_lock(&anon_vma->rwlock);
}

static inline void anon_vma_unlock(struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
if (anon_vma)
- spin_unlock(&anon_vma->lock);
+ write_unlock(&anon_vma->rwlock);
}

/*
Index: Linux/mm/rmap.c
===================================================================
--- Linux.orig/mm/rmap.c 2007-11-28 10:54:37.000000000 -0500
+++ Linux/mm/rmap.c 2007-11-28 11:12:37.000000000 -0500
@@ -25,7 +25,7 @@
* mm->mmap_sem
* page->flags PG_locked (lock_page)
* mapping->i_mmap_lock
- * anon_vma->lock
+ * anon_vma->rwlock
* mm->page_table_lock or pte_lock
* zone->lru_lock (in mark_page_accessed, isolate_lru_page)
* swap_lock (in swap_duplicate, swap_info_get)
@@ -68,7 +68,7 @@ int anon_vma_prepare(struct vm_area_stru
if (anon_vma) {
allocated = NULL;
locked = anon_vma;
- spin_lock(&locked->lock);
+ write_lock(&locked->rwlock);
} else {
anon_vma = anon_vma_alloc();
if (unlikely(!anon_vma))
@@ -87,7 +87,7 @@ int anon_vma_prepare(struct vm_area_stru
spin_unlock(&mm->page_table_lock);

if (locked)
- spin_unlock(&locked->lock);
+ write_unlock(&locked->rwlock);
if (unlikely(allocated))
anon_vma_free(allocated);
}
@@ -113,9 +113,9 @@ void anon_vma_link(struct vm_area_struct
struct anon_vma *anon_vma = vma->anon_vma;

if (anon_vma) {
- spin_lock(&anon_vma->lock);
+ write_lock(&anon_vma->rwlock);
list_add_tail(&vma->anon_vma_node, &anon_vma->head);
- spin_unlock(&anon_vma->lock);
+ write_unlock(&anon_vma->rwlock);
}
}

@@ -127,12 +127,12 @@ void anon_vma_unlink(struct vm_area_stru
if (!anon_vma)
return;

- spin_lock(&anon_vma->lock);
+ write_lock(&anon_vma->rwlock);
list_del(&vma->anon_vma_node);

/* We must garbage collect the anon_vma if it's empty */
empty = list_empty(&anon_vma->head);
- spin_unlock(&anon_vma->lock);
+ write_unlock(&anon_vma->rwlock);

if (empty)
anon_vma_free(anon_vma);
@@ -142,7 +142,7 @@ static void anon_vma_ctor(struct kmem_ca
{
struct anon_vma *anon_vma = data;

- spin_lock_init(&anon_vma->lock);
+ rwlock_init(&anon_vma->rwlock);
INIT_LIST_HEAD(&anon_vma->head);
}

@@ -169,7 +169,7 @@ static struct anon_vma *page_lock_anon_v
goto out;

anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
- spin_lock(&anon_vma->lock);
+ read_lock(&anon_vma->rwlock);
return anon_vma;
out:
rcu_read_unlock();
@@ -178,7 +178,7 @@ out:

static void page_unlock_anon_vma(struct anon_vma *anon_vma)
{
- spin_unlock(&anon_vma->lock);
+ read_unlock(&anon_vma->rwlock);
rcu_read_unlock();
}

Index: Linux/mm/mmap.c
===================================================================
--- Linux.orig/mm/mmap.c 2007-11-28 10:54:36.000000000 -0500
+++ Linux/mm/mmap.c 2007-11-28 11:12:37.000000000 -0500
@@ -564,7 +564,7 @@ again: remove_next = 1 + (end > next->
if (vma->anon_vma)
anon_vma = vma->anon_vma;
if (anon_vma) {
- spin_lock(&anon_vma->lock);
+ write_lock(&anon_vma->rwlock);
/*
* Easily overlooked: when mprotect shifts the boundary,
* make sure the expanding vma has anon_vma set if the
@@ -618,7 +618,7 @@ again: remove_next = 1 + (end > next->
}

if (anon_vma)
- spin_unlock(&anon_vma->lock);
+ write_unlock(&anon_vma->rwlock);
if (mapping)
spin_unlock(&mapping->i_mmap_lock);

Index: Linux/mm/migrate.c
===================================================================
--- Linux.orig/mm/migrate.c 2007-11-28 10:54:36.000000000 -0500
+++ Linux/mm/migrate.c 2007-11-28 11:12:37.000000000 -0500
@@ -229,12 +229,12 @@ static void remove_anon_migration_ptes(s
* We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
*/
anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON);
- spin_lock(&anon_vma->lock);
+ read_lock(&anon_vma->rwlock);

list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
remove_migration_pte(vma, old, new);

- spin_unlock(&anon_vma->lock);
+ read_unlock(&anon_vma->rwlock);
}

/*

--
All Rights Reversed

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/