[PATCH 1/2] SLUB: Out-of-memory diagnostics

From: Pekka J Enberg
Date: Thu Jun 11 2009 - 04:43:59 EST


From: Pekka Enberg <penberg@xxxxxxxxxxxxxx>

As suggested by Mel Gorman, add out-of-memory diagnostics to the SLUB allocator
to make debugging OOM conditions easier. This patch helped hunt down a nasty
OOM issue that popped up every now that was caused by SLUB debugging code which
forced 4096 byte allocations to use order 1 pages even in the fallback case.

An example print out looks like this:

<snip page allocator out-of-memory message>
SLUB: Unable to allocate memory on node -1 (gfp=20)
cache: kmalloc-4096, object size: 4096, buffer size: 4168, default order: 3, min order: 1
node 0: slabs: 95, objs: 665, free: 0

Cc: Christoph Lameter <cl@xxxxxxxxxxxxxxxxxxxx>
Acked-by: Mel Gorman <mel@xxxxxxxxx>
Tested-by: Larry Finger <Larry.Finger@xxxxxxxxxxxx>
Signed-off-by: Pekka Enberg <penberg@xxxxxxxxxxxxxx>
---
mm/slub.c | 70 ++++++++++++++++++++++++++++++++++++++++++++----------------
1 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 65ffda5..2bbacfc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1484,6 +1484,56 @@ static inline int node_match(struct kmem_cache_cpu *c, int node)
return 1;
}

+static int count_free(struct page *page)
+{
+ return page->objects - page->inuse;
+}
+
+static unsigned long count_partial(struct kmem_cache_node *n,
+ int (*get_count)(struct page *))
+{
+ unsigned long flags;
+ unsigned long x = 0;
+ struct page *page;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->partial, lru)
+ x += get_count(page);
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ return x;
+}
+
+static noinline void
+slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
+{
+ int node;
+
+ printk(KERN_WARNING
+ "SLUB: Unable to allocate memory on node %d (gfp=%x)\n",
+ nid, gfpflags);
+ printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
+ "default order: %d, min order: %d\n", s->name, s->objsize,
+ s->size, oo_order(s->oo), oo_order(s->min));
+
+ for_each_online_node(node) {
+ struct kmem_cache_node *n = get_node(s, node);
+ unsigned long nr_slabs;
+ unsigned long nr_objs;
+ unsigned long nr_free;
+
+ if (!n)
+ continue;
+
+ nr_slabs = atomic_long_read(&n->nr_slabs);
+ nr_objs = atomic_long_read(&n->total_objects);
+ nr_free = count_partial(n, count_free);
+
+ printk(KERN_WARNING
+ " node %d: slabs: %ld, objs: %ld, free: %ld\n",
+ node, nr_slabs, nr_objs, nr_free);
+ }
+}
+
/*
* Slow path. The lockless freelist is empty or we need to perform
* debugging duties.
@@ -1565,6 +1615,7 @@ new_slab:
c->page = new;
goto load_freelist;
}
+ slab_out_of_memory(s, gfpflags, node);
return NULL;
debug:
if (!alloc_debug_processing(s, c->page, object, addr))
@@ -3318,20 +3369,6 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
}

#ifdef CONFIG_SLUB_DEBUG
-static unsigned long count_partial(struct kmem_cache_node *n,
- int (*get_count)(struct page *))
-{
- unsigned long flags;
- unsigned long x = 0;
- struct page *page;
-
- spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->partial, lru)
- x += get_count(page);
- spin_unlock_irqrestore(&n->list_lock, flags);
- return x;
-}
-
static int count_inuse(struct page *page)
{
return page->inuse;
@@ -3342,11 +3379,6 @@ static int count_total(struct page *page)
return page->objects;
}

-static int count_free(struct page *page)
-{
- return page->objects - page->inuse;
-}
-
static int validate_slab(struct kmem_cache *s, struct page *page,
unsigned long *map)
{
--
1.6.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/