Re: REGRESSION: Performance regressions from switching anon_vma->lockto mutex

From: Linus Torvalds
Date: Thu Jun 16 2011 - 18:10:23 EST


On Thu, Jun 16, 2011 at 2:05 PM, Linus Torvalds
<torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
>
> This patch is UNTESTED!

It was also UNATTACHED!

Now it's attached.

Linus
mm/rmap.c | 51 +++++++++++++++++++++++++++++++++++----------------
1 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index 0eb463ea88dd..d1d26900c082 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -208,13 +208,11 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
avc->anon_vma = anon_vma;
list_add(&avc->same_vma, &vma->anon_vma_chain);

- anon_vma_lock(anon_vma);
/*
* It's critical to add new vmas to the tail of the anon_vma,
* see comment in huge_memory.c:__split_huge_page().
*/
list_add_tail(&avc->same_anon_vma, &anon_vma->head);
- anon_vma_unlock(anon_vma);
}

/*
@@ -224,16 +222,30 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
{
struct anon_vma_chain *avc, *pavc;
+ struct anon_vma *root = NULL;

list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
+ struct anon_vma *anon_vma = pavc->anon_vma, *new_root = anon_vma->root;
+
+ if (new_root != root) {
+ if (WARN_ON_ONCE(root))
+ mutex_unlock(&root->mutex);
+ root = new_root;
+ mutex_lock(&root->mutex);
+ }
+
avc = anon_vma_chain_alloc();
if (!avc)
goto enomem_failure;
anon_vma_chain_link(dst, avc, pavc->anon_vma);
}
+ if (root)
+ mutex_unlock(&root->mutex);
return 0;

enomem_failure:
+ if (root)
+ mutex_unlock(&root->mutex);
unlink_anon_vmas(dst);
return -ENOMEM;
}
@@ -280,7 +292,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
get_anon_vma(anon_vma->root);
/* Mark this anon_vma as the one where our new (COWed) pages go. */
vma->anon_vma = anon_vma;
+ anon_vma_lock(anon_vma);
anon_vma_chain_link(vma, avc, anon_vma);
+ anon_vma_unlock(anon_vma);

return 0;

@@ -291,39 +305,44 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
return -ENOMEM;
}

-static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
+static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain, struct anon_vma *anon_vma)
{
- struct anon_vma *anon_vma = anon_vma_chain->anon_vma;
- int empty;
-
- /* If anon_vma_fork fails, we can get an empty anon_vma_chain. */
- if (!anon_vma)
- return;
-
- anon_vma_lock(anon_vma);
list_del(&anon_vma_chain->same_anon_vma);

/* We must garbage collect the anon_vma if it's empty */
- empty = list_empty(&anon_vma->head);
- anon_vma_unlock(anon_vma);
-
- if (empty)
+ if (list_empty(&anon_vma->head))
put_anon_vma(anon_vma);
}

void unlink_anon_vmas(struct vm_area_struct *vma)
{
struct anon_vma_chain *avc, *next;
+ struct anon_vma *root = NULL;

/*
* Unlink each anon_vma chained to the VMA. This list is ordered
* from newest to oldest, ensuring the root anon_vma gets freed last.
*/
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
- anon_vma_unlink(avc);
+ struct anon_vma *anon_vma = avc->anon_vma;
+
+ /* If anon_vma_fork fails, we can get an empty anon_vma_chain. */
+ if (anon_vma) {
+ struct anon_vma *new_root = anon_vma->root;
+
+ if (new_root != root) {
+ if (WARN_ON_ONCE(root))
+ mutex_unlock(&root->mutex);
+ root = new_root;
+ mutex_lock(&root->mutex);
+ }
+ anon_vma_unlink(avc, anon_vma);
+ }
list_del(&avc->same_vma);
anon_vma_chain_free(avc);
}
+ if (root)
+ mutex_unlock(&root->mutex);
}

static void anon_vma_ctor(void *data)