RE: [patch] mm, slab: faster active and free stats

From: 김준수/선임연구원/SW Platform(연)AOT팀(iamjoonsoo.kim@lge.com)
Date: Fri Dec 02 2016 - 03:03:09 EST


Hello, David.

There is some problem on my e-mail client so I have to use another one.
Please understand broken reply style.

Yeah, I like this version much. Can we do account slabs_free directly in get_first_slab()
and get_valid_first_slab()? Passing page_is_free isn't needed if we do it directly in
those functions.

One nitpick is that if we don't replace variable name, num_slabs with total_slabs, we will
get less churn the code. However, total_slabs looks better than num_slabs.

Thanks.

-----Original Message-----
From: David Rientjes [mailto:rientjes@xxxxxxxxxx]
Sent: Wednesday, November 30, 2016 9:57 AM
To: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>; Greg Thelen <gthelen@xxxxxxxxxx>; Aruna Ramakrishna <aruna.ramakrishna@xxxxxxxxxx>; Christoph Lameter <cl@xxxxxxxxx>; linux-kernel@xxxxxxxxxxxxxxx; linux-mm@xxxxxxxxx
Subject: Re: [patch] mm, slab: faster active and free stats

On Mon, 28 Nov 2016, Joonsoo Kim wrote:

> Hello,
>
> Sorry for long delay.
> I agree that this improvement is needed. Could you try the approach
> that maintains n->num_slabs and n->free_slabs? I guess that it would
> be simpler than this patch so more maintainable.
>

Ok, what do you think about the following? I'm not sure it's that much more simpler.


mm, slab: track total number of slabs instead of active slabs

Rather than tracking the number of active slabs for each node, track the
total number of slabs. This is a minor improvement that avoids active
slab tracking when a slab goes from free to partial or partial to free.

Suggested-by: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
Signed-off-by: David Rientjes <rientjes@xxxxxxxxxx>
---
mm/slab.c | 48 +++++++++++++++++++++---------------------------
mm/slab.h | 4 ++--
2 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -227,7 +227,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
INIT_LIST_HEAD(&parent->slabs_full);
INIT_LIST_HEAD(&parent->slabs_partial);
INIT_LIST_HEAD(&parent->slabs_free);
- parent->active_slabs = 0;
+ parent->total_slabs = 0;
parent->free_slabs = 0;
parent->shared = NULL;
parent->alien = NULL;
@@ -1381,20 +1381,18 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
cachep->name, cachep->size, cachep->gfporder);

for_each_kmem_cache_node(cachep, node, n) {
- unsigned long active_objs = 0, free_objs = 0;
- unsigned long active_slabs, num_slabs;
+ unsigned long total_slabs, free_slabs, free_objs;

spin_lock_irqsave(&n->list_lock, flags);
- active_slabs = n->active_slabs;
- num_slabs = active_slabs + n->free_slabs;
-
- active_objs += (num_slabs * cachep->num) - n->free_objects;
- free_objs += n->free_objects;
+ total_slabs = n->total_slabs;
+ free_slabs = n->free_slabs;
+ free_objs = n->free_objects;
spin_unlock_irqrestore(&n->list_lock, flags);

- pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
- node, active_slabs, num_slabs, active_objs,
- num_slabs * cachep->num, free_objs);
+ pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n",
+ node, total_slabs - free_slabs, total_slabs,
+ (total_slabs * cachep->num) - free_objs,
+ total_slabs * cachep->num);
}
#endif
}
@@ -2307,6 +2305,7 @@ static int drain_freelist(struct kmem_cache *cache,
page = list_entry(p, struct page, lru);
list_del(&page->lru);
n->free_slabs--;
+ n->total_slabs--;
/*
* Safe to drop the lock. The slab is no longer linked
* to the cache.
@@ -2741,13 +2740,12 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
n = get_node(cachep, page_to_nid(page));

spin_lock(&n->list_lock);
+ n->total_slabs++;
if (!page->active) {
list_add_tail(&page->lru, &(n->slabs_free));
n->free_slabs++;
- } else {
+ } else
fixup_slab_list(cachep, n, page, &list);
- n->active_slabs++;
- }

STATS_INC_GROWN(cachep);
n->free_objects += cachep->num - page->active;
@@ -2935,10 +2933,8 @@ static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
if (sk_memalloc_socks())
page = get_valid_first_slab(n, page, &page_is_free, pfmemalloc);

- if (page && page_is_free) {
- n->active_slabs++;
+ if (page && page_is_free)
n->free_slabs--;
- }

return page;
}
@@ -3441,7 +3437,6 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
if (page->active == 0) {
list_add(&page->lru, &n->slabs_free);
n->free_slabs++;
- n->active_slabs--;
} else {
/* Unconditionally move a slab to the end of the
* partial list on free - maximum time for the
@@ -3457,6 +3452,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
page = list_last_entry(&n->slabs_free, struct page, lru);
list_move(&page->lru, list);
n->free_slabs--;
+ n->total_slabs--;
}
}

@@ -4109,8 +4105,8 @@ static void cache_reap(struct work_struct *w)
void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
{
unsigned long active_objs, num_objs, active_slabs;
- unsigned long num_slabs = 0, free_objs = 0, shared_avail = 0;
- unsigned long num_slabs_free = 0;
+ unsigned long total_slabs = 0, free_objs = 0, shared_avail = 0;
+ unsigned long free_slabs = 0;
int node;
struct kmem_cache_node *n;

@@ -4118,9 +4114,8 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
check_irq_on();
spin_lock_irq(&n->list_lock);

- num_slabs += n->active_slabs + n->free_slabs;
- num_slabs_free += n->free_slabs;
-
+ total_slabs += n->total_slabs;
+ free_slabs += n->free_slabs;
free_objs += n->free_objects;

if (n->shared)
@@ -4128,15 +4123,14 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)

spin_unlock_irq(&n->list_lock);
}
- num_objs = num_slabs * cachep->num;
- active_slabs = num_slabs - num_slabs_free;
-
+ num_objs = total_slabs * cachep->num;
+ active_slabs = total_slabs - free_slabs;
active_objs = num_objs - free_objs;

sinfo->active_objs = active_objs;
sinfo->num_objs = num_objs;
sinfo->active_slabs = active_slabs;
- sinfo->num_slabs = num_slabs;
+ sinfo->num_slabs = total_slabs;
sinfo->shared_avail = shared_avail;
sinfo->limit = cachep->limit;
sinfo->batchcount = cachep->batchcount;
diff --git a/mm/slab.h b/mm/slab.h
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -432,8 +432,8 @@ struct kmem_cache_node {
struct list_head slabs_partial; /* partial list first, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
- unsigned long active_slabs; /* length of slabs_partial+slabs_full */
- unsigned long free_slabs; /* length of slabs_free */
+ unsigned long total_slabs; /* length of all slab lists */
+ unsigned long free_slabs; /* length of free slab list only */
unsigned long free_objects;
unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */