Re: [PATCH] slub Discard slab page only when node partials > minimumsetting

From: Christoph Lameter
Date: Thu Sep 29 2011 - 10:32:34 EST


On Thu, 29 Sep 2011, Alex,Shi wrote:

> > Is it really worth it? The higher the value the higher the potential
> > memory that is stuck in the per cpu partial pages?
>
> It is hard to find best balance. :)

Well then lets err on the side of smaller memory use for now.

> I am tested aim9/netperf, both of them was said related to memory
> allocation, but didn't find performance change with/without PCP. Seems
> only hackbench sensitive on this. As to aim9, whichever with ourself
> configuration, or with Mel Gorman's aim9 configuration from his mmtest,
> both of them has no clear performance change for PCP slub.

AIM9 tests are usually single threaded so I would not expect any
differences. Try AIM7? And concurrent netperfs?

The PCP patch helps only if there is node lock contention. Meaning
simultaneous allocations/frees from multiple processor from the same
cache.

> Checking the kernel function call graphic via perf record/perf report,
> slab function only be used much in hackbench benchmark.

Then the question arises if its worthwhile merging if it only affects this
benchmark.

> Above is what I did this week for PCP.
>
> BTW, I will take my one week holiday from tomorrow. e-mail access will
> be slow.

Have a nice holiday.diff --git a/mm/slub.c b/mm/slub.c

index 492beab..372f219 100644

--- a/mm/slub.c

+++ b/mm/slub.c

@@ -1613,7 +1613,7 @@ static void *get_partial_node(struct kmem_cache *s,

spin_lock(&n->list_lock);

list_for_each_entry_safe(page, page2, &n->partial, lru) {

void *t = acquire_slab(s, n, page, object == NULL);

- int available;

+ int available = 1;



if (!t)

continue;

@@ -1623,12 +1623,14 @@ static void *get_partial_node(struct kmem_cache *s,

c->node = page_to_nid(page);

stat(s, ALLOC_FROM_PARTIAL);

object = t;

- available = page->objects - page->inuse;

} else {

page->freelist = t;

available = put_cpu_partial(s, page, 0);

+ if(!available)

+ add_partial(n, page, 0);

+

}

- if (kmem_cache_debug(s) || available > s->cpu_partial / 2)

+ if (kmem_cache_debug(s) || !available)

break;



}

@@ -2017,17 +2019,10 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)

if (oldpage) {

pobjects = oldpage->pobjects;

pages = oldpage->pages;

- if (drain && pobjects > s->cpu_partial) {

- unsigned long flags;

- /*

- * partial array is full. Move the existing

- * set to the per node partial list.

- */

- local_irq_save(flags);

- unfreeze_partials(s);

- local_irq_restore(flags);

+ if (pobjects > s->cpu_partial) {

pobjects = 0;

- pages = 0;

+ page->frozen = 0;

+ break;

}

}



@@ -2039,7 +2034,10 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)

page->next = oldpage;



} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);

- stat(s, CPU_PARTIAL_FREE);

+

+ if(pobjects)

+ stat(s, CPU_PARTIAL_FREE);

+

return pobjects;

}



@@ -2472,6 +2470,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,

new.inuse--;

if ((!new.inuse || !prior) && !was_frozen && !n) {



+

if (!kmem_cache_debug(s) && !prior)



/*

@@ -2482,7 +2481,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,



else { /* Needs to be taken off a list */



- n = get_node(s, page_to_nid(page));

+ n = get_node(s, page_to_nid(page));

/*

* Speculatively acquire the list_lock.

* If the cmpxchg does not succeed then we may

@@ -2492,8 +2491,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,

* other processors updating the list of slabs.

*/

spin_lock_irqsave(&n->list_lock, flags);

-

}

+

}

inuse = new.inuse;



@@ -2503,23 +2502,23 @@ static void __slab_free(struct kmem_cache *s, struct page *page,

"__slab_free"));



if (likely(!n)) {

-

- /*

- * If we just froze the page then put it onto the

- * per cpu partial list.

- */

if (new.frozen && !was_frozen)

- put_cpu_partial(s, page, 1);

+ if (!put_cpu_partial(s, page, 1)){

+ n = get_node(s, page_to_nid(page));

+ spin_lock_irqsave(&n->list_lock, flags);

+ goto get_lock;

+ }



/*

* The list lock was not taken therefore no list

* activity can be necessary.

*/

- if (was_frozen)

- stat(s, FREE_FROZEN);

- return;

- }

+ if (was_frozen)

+ stat(s, FREE_FROZEN);

+ return;

+ }



+get_lock:

/*

* was_frozen may have been set after we acquired the list_lock in

* an earlier loop. So we need to check it here again.

@@ -2536,7 +2535,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,

*/

if (unlikely(!prior)) {

remove_full(s, page);

- add_partial(n, page, 0);

+ add_partial(n, page, 1);

stat(s, FREE_ADD_PARTIAL);

}

}