[RFC][PATCH 8/8] Add support critical pool support to the slab allocator

From: Matthew Dobson
Date: Fri Nov 18 2005 - 14:47:01 EST


Finally, teach the slab allocator how to deal with critical pages and how
to keep them for use exclusively by __GFP_CRITICAL allocations.

-Matt
Modify the Slab Allocator to support the addition of a Critical Pool to the VM.
What we want is to ensure that if a cache is allocated a new slab page from the
Critical Pool during an Emergency situation, that only other __GFP_CRITICAL
allocations are satisfied from that slab.

Signed-off-by: Matthew Dobson <colpatch@xxxxxxxxxx>

Index: linux-2.6.15-rc1+critical_pool/mm/slab.c
===================================================================
--- linux-2.6.15-rc1+critical_pool.orig/mm/slab.c 2005-11-17 16:51:22.965173864 -0800
+++ linux-2.6.15-rc1+critical_pool/mm/slab.c 2005-11-17 17:22:03.056437472 -0800
@@ -220,6 +220,7 @@ struct slab {
unsigned long colouroff;
void *s_mem; /* including colour offset */
unsigned int inuse; /* # of objs active in slab */
+ unsigned short critical; /* is this an critical slab? */
kmem_bufctl_t free;
unsigned short nid; /* node number slab is on */
};
@@ -395,6 +396,9 @@ struct kmem_cache {
unsigned int slab_size;
unsigned int dflags; /* dynamic flags */

+ /* list of critical slabs for this cache */
+ struct list_head slabs_crit;
+
/* constructor func */
void (*ctor)(void *, kmem_cache_t *, unsigned long);

@@ -1770,6 +1774,7 @@ kmem_cache_t *kmem_cache_create(const ch
cachep->gfpflags |= GFP_DMA;
spin_lock_init(&cachep->spinlock);
cachep->objsize = size;
+ INIT_LIST_HEAD(&cachep->slabs_crit);

if (flags & CFLGS_OFF_SLAB)
cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
@@ -2090,6 +2095,7 @@ static struct slab *alloc_slabmgmt(kmem_
slabp->inuse = 0;
slabp->colouroff = colour_off;
slabp->s_mem = objp + colour_off;
+ slabp->critical = 0;

return slabp;
}
@@ -2182,7 +2188,8 @@ static void return_object(kmem_cache_t *

#if DEBUG
/* Verify that the slab belongs to the intended node */
- WARN_ON(slabp->nid != nid);
+ if (nid >= 0)
+ WARN_ON(slabp->nid != nid);

if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
printk(KERN_ERR "slab: double free detected in cache "
@@ -2341,6 +2348,24 @@ bad:
#define check_slabp(x,y) do { } while(0)
#endif

+static inline struct slab *get_critical_slab(kmem_cache_t *cachep, gfp_t flags)
+{
+ struct slab *slabp = NULL;
+
+ spin_lock(&cachep->spinlock);
+ /* search for any partially free critical slabs */
+ if (!list_empty(&cachep->slabs_crit)) {
+ list_for_each_entry(slabp, &cachep->slabs_crit, list)
+ if (slabp->free != BUFCTL_END)
+ goto found;
+ slabp = NULL;
+ }
+found:
+ spin_unlock(&cachep->spinlock);
+
+ return slabp;
+}
+
/**
* Helper function for cache_grow(). Handle cache coloring, allocating a
* struct slab and initializing the slab.
@@ -2396,10 +2421,11 @@ out:
* Grow (by 1) the number of slabs within a cache. This is called by
* kmem_cache_alloc() when there are no active objs left in a cache.
*/
-static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nid)
+static void *cache_grow(kmem_cache_t *cachep, gfp_t flags, int nid)
{
struct slab *slabp = NULL;
void *objp = NULL;
+ int critical = is_emergency_alloc(flags);

/*
* Be lazy and only check for valid flags here,
@@ -2411,6 +2437,13 @@ static int cache_grow(kmem_cache_t *cach
goto out;

/*
+ * We are in an emergency situation and this is a 'critical' alloc,
+ * so check if we've got an existing critical slab first
+ */
+ if (critical && (slabp = get_critical_slab(cachep, flags)))
+ goto got_critical_slab;
+
+ /*
* Ensure caller isn't asking for DMA memory if the slab wasn't created
* with the SLAB_DMA flag.
* Also ensure the caller *is* asking for DMA memory if the slab was
@@ -2431,13 +2464,34 @@ static int cache_grow(kmem_cache_t *cach

STATS_INC_GROWN(cachep);
/* Make slab active. */
- spin_lock(&l3->list_lock);
- list_add_tail(&slabp->list, &l3->slabs_free);
- l3->free_objects += cachep->num;
- spin_unlock(&l3->list_lock);
+ if (!critical) {
+ spin_lock(&l3->list_lock);
+ list_add_tail(&slabp->list, &l3->slabs_free);
+ l3->free_objects += cachep->num;
+ spin_unlock(&l3->list_lock);
+ } else {
+ spin_lock(&cachep->spinlock);
+ list_add_tail(&slabp->list, &cachep->slabs_crit);
+ slabp->critical = 1;
+ spin_unlock(&cachep->spinlock);
+got_critical_slab:
+ objp = get_object(cachep, slabp, nid);
+ check_slabp(cachep, slabp);
+ }
}
out:
- return objp != NULL;
+ return objp;
+}
+
+static inline int is_critical_object(void *obj)
+{
+ struct slab *slabp;
+
+ if (!obj)
+ return 0;
+
+ slabp = GET_PAGE_SLAB(virt_to_page(obj));
+ return slabp->critical;
}

static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags)
@@ -2516,12 +2570,15 @@ alloc_done:
spin_unlock(&l3->list_lock);

if (unlikely(!ac->avail)) {
- int x;
- x = cache_grow(cachep, flags, numa_node_id());
+ void *obj = cache_grow(cachep, flags, numa_node_id());
+
+ /* critical objects don't "grow" the slab, just return 'obj' */
+ if (is_critical_object(obj))
+ return obj;

/* cache_grow can reenable interrupts, then ac could change. */
ac = ac_data(cachep);
- if (!x && ac->avail == 0) /* no objects in sight? abort */
+ if (!obj && ac->avail == 0) /* No objects in sight? Abort. */
return NULL;

if (!ac->avail) /* objects refilled by interrupt? */
@@ -2633,7 +2690,6 @@ static void *__cache_alloc_node(kmem_cac
struct slab *slabp;
struct kmem_list3 *l3;
void *obj;
- int x;

l3 = cachep->nodelists[nid];
BUG_ON(!l3);
@@ -2675,11 +2731,15 @@ retry:

must_grow:
spin_unlock(&l3->list_lock);
- x = cache_grow(cachep, flags, nid);
+ obj = cache_grow(cachep, flags, nid);

- if (!x)
+ if (!obj)
return NULL;

+ /* critical objects don't "grow" the slab, just return 'obj' */
+ if (is_critical_object(obj))
+ goto done;
+
goto retry;
done:
return obj;
@@ -2780,6 +2840,22 @@ free_done:
sizeof(void *) * ac->avail);
}

+static inline void free_critical_object(kmem_cache_t *cachep, void *objp)
+{
+ struct slab *slabp = GET_PAGE_SLAB(virt_to_page(objp));
+
+ check_slabp(cachep, slabp);
+ return_object(cachep, slabp, objp, -1);
+ check_slabp(cachep, slabp);
+
+ if (slabp->inuse == 0) {
+ BUG_ON(cachep->flags & SLAB_DESTROY_BY_RCU);
+ BUG_ON(cachep->gfporder);
+
+ list_del(&slabp->list);
+ slab_destroy(cachep, slabp);
+ }
+}

/**
* __cache_free
@@ -2795,6 +2871,11 @@ static inline void __cache_free(kmem_cac
check_irq_off();
objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));

+ if (is_critical_object(objp)) {
+ free_critical_object(cachep, objp);
+ return;
+ }
+
/*
* Make sure we are not freeing a object from another
* node to the array cache on this cpu.