[RFC, PATCH] SLAB : [NUMA] keep nodeid in struct page instead ofstruct slab

From: Eric Dumazet
Date: Wed Mar 21 2007 - 05:22:46 EST


In order to avoid a cache miss in kmem_cache_free() on NUMA and reduce hot path length, we could exploit the following common facts.

1) MAX_NUMNODES <= 64

2) alignment of 'struct kmem_cache *' can be >= 64

The following patch changes the page->lru.next to contain not only the 'struct kmem_cache *' pointer, but also the nodeid in the low order bits.

This also reduces sizeof(struct slab) by 8 bytes on 64bits arches.
This reduces sizeof(struct slab) on all platforms (UP, or SMP)

Signed-off-by: Eric Dumazet <dada1@xxxxxxxxxxxxx>

diff --git a/mm/slab.c b/mm/slab.c
index abf46ae..d2f7299 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -210,6 +210,16 @@ #define BUFCTL_FREE (((kmem_bufctl_t)(~0
#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)

+#ifdef CONFIG_NUMA
+#if MAX_NUMNODES <= 64
+/* we can use low order bits of page->lru.next to store nodeids */
+# define KEEP_NODEID_IN_PAGE
+#else
+/* too many nodes, we need a field in 'struct slab' */
+# define KEEP_NODEID_IN_SLAB
+#endif
+#endif
+
/*
* struct slab
*
@@ -223,7 +233,9 @@ struct slab {
void *s_mem; /* including colour offset */
unsigned int inuse; /* num of objs active in slab */
kmem_bufctl_t free;
+#ifdef KEEP_NODEID_IN_SLAB
unsigned short nodeid;
+#endif
};

/*
@@ -585,9 +597,18 @@ static int slab_break_gfp_order = BREAK_
* allocator. These are used to find the slab an obj belongs to. With kfree(),
* these are used to find the cache which an obj belongs to.
*/
-static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
+static inline void page_set_cache_slab_nodeid(struct page *page,
+ struct kmem_cache *cache, struct slab *slab, int nodeid)
{
+ page->lru.prev = (struct list_head *)slab;
+#ifdef KEEP_NODEID_IN_PAGE
+ page->lru.next = (struct list_head *)((long)cache + nodeid);
+#else
page->lru.next = (struct list_head *)cache;
+#endif
+#ifdef KEEP_NODEID_IN_SLAB
+ slab->nodeid = nodeid;
+#endif
}

static inline struct kmem_cache *page_get_cache(struct page *page)
@@ -595,12 +616,11 @@ static inline struct kmem_cache *page_ge
if (unlikely(PageCompound(page)))
page = (struct page *)page_private(page);
BUG_ON(!PageSlab(page));
+#ifdef KEEP_NODEID_IN_PAGE
+ return (struct kmem_cache *)((long)page->lru.next & ~63);
+#else
return (struct kmem_cache *)page->lru.next;
-}
-
-static inline void page_set_slab(struct page *page, struct slab *slab)
-{
- page->lru.prev = (struct list_head *)slab;
+#endif
}

static inline struct slab *page_get_slab(struct page *page)
@@ -617,6 +637,18 @@ static inline struct kmem_cache *virt_to
return page_get_cache(page);
}

+#ifdef CONFIG_NUMA
+static inline int virt_to_nodeid(const void *obj)
+{
+ struct page *page = virt_to_page(obj);
+#ifdef KEEP_NODEID_IN_SLAB
+ return page_get_slab(page)->nodeid;
+#else
+ return (long)page->lru.next & 63;
+#endif
+}
+#endif
+
static inline struct slab *virt_to_slab(const void *obj)
{
struct page *page = virt_to_page(obj);
@@ -1134,8 +1166,7 @@ static void drain_alien_cache(struct kme

static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
{
- struct slab *slabp = virt_to_slab(objp);
- int nodeid = slabp->nodeid;
+ int nodeid = virt_to_nodeid(objp);
struct kmem_list3 *l3;
struct array_cache *alien = NULL;
int node;
@@ -1146,7 +1177,7 @@ static inline int cache_free_alien(struc
* Make sure we are not freeing a object from another node to the array
* cache on this cpu.
*/
- if (likely(slabp->nodeid == node) || unlikely(!use_alien_caches))
+ if (likely(nodeid == node) || unlikely(!use_alien_caches))
return 0;

l3 = cachep->nodelists[node];
@@ -1437,8 +1468,14 @@ void __init kmem_cache_init(void)
cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE];

+#ifdef KEEP_NODEID_IN_PAGE
+ /* kmem_cache addresses must be multiple of 64 */
+ cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
+ max(64, cache_line_size()));
+#else
cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
cache_line_size());
+#endif
cache_cache.reciprocal_buffer_size =
reciprocal_value(cache_cache.buffer_size);

@@ -2588,7 +2625,6 @@ static struct slab *alloc_slabmgmt(struc
slabp->inuse = 0;
slabp->colouroff = colour_off;
slabp->s_mem = objp + colour_off;
- slabp->nodeid = nodeid;
return slabp;
}

@@ -2699,7 +2735,7 @@ #endif
* virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging.
*/
static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
- void *addr)
+ void *addr, int nodeid)
{
int nr_pages;
struct page *page;
@@ -2711,8 +2747,7 @@ static void slab_map_pages(struct kmem_c
nr_pages <<= cache->gfporder;

do {
- page_set_cache(page, cache);
- page_set_slab(page, slab);
+ page_set_cache_slab_nodeid(page, cache, slab, nodeid);
page++;
} while (--nr_pages);
}
@@ -2787,8 +2822,7 @@ static int cache_grow(struct kmem_cache
if (!slabp)
goto opps1;

- slabp->nodeid = nodeid;
- slab_map_pages(cachep, slabp, objp);
+ slab_map_pages(cachep, slabp, objp, nodeid);

cache_init_objs(cachep, slabp, ctor_flags);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/