[MODSLAB 4/4] Slabulator: Emulate the existing Slab Layer

From: Christoph Lameter
Date: Sat Aug 26 2006 - 22:31:29 EST

Next message: Adrian Bunk: "2.6.18-rc4-mm3: NF_CONNTRACK_FTP=y compile error"
Previous message: Christoph Lameter: "[MODSLAB 3/4] A Kmalloc subsystem"
In reply to: Christoph Lameter: "[MODSLAB 3/4] A Kmalloc subsystem"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

The slab emulation layer.

This provides a layer that implements the existing slab API.
We try to keep the definitions that we copy from slab.h
to an absolute minimum. If things break then more
(useless) definitions from slab.h may be needed.

We put a hook into slab.h to redirect includes for slab.h to
slabulator.h.

The slabulator also contains the slab reaper since it is
used by the page allocator. However, the slabifier does not
need any of this since it is not per cpu cache based. The slabifier
does never reap active slabs in single processor configuration.
For SMP and NUMA a slab specific reaper is used.

Signed-off-by: Christoph Lameter <clameter@xxxxxxx>

Index: linux-2.6.18-rc4-mm3/mm/slabulator.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.18-rc4-mm3/mm/slabulator.c 2006-08-26 18:27:52.666216263 -0700
@@ -0,0 +1,287 @@
+/*
+ * Slabulator = Emulate the Slab API.
+ *
+ * (C) 2006 Silicon Graphics, Inc. Christoph Lameter <clameter@xxxxxxx>
+ *
+ */
+#include <linux/mm.h>
+#include <linux/kmalloc.h>
+#include <linux/module.h>
+#include <linux/allocator.h>
+#include <linux/bitops.h>
+#include <linux/slabulator.h>
+#include <linux/slabstat.h>
+
+#define SLAB_MAX_ORDER 4
+
+#define SLABULATOR_MERGE
+
+#ifndef ARCH_SLAB_MINALIGN
+#define ARCH_SLAB_MINALIGN sizeof(void *)
+#endif
+
+static int calculate_order(int size)
+{
+ int order;
+ int rem;
+
+ for(order = max(0, fls(size - 1) - PAGE_SHIFT);
+ order < MAX_ORDER; order++) {
+ unsigned long slab_size = PAGE_SIZE << order;
+
+ if (slab_size < size)
+ continue;
+
+ rem = slab_size % size;
+
+ if (rem * 8 <= PAGE_SIZE << order)
+ break;
+
+ }
+ if (order >= MAX_ORDER)
+ return -E2BIG;
+ return order;
+}
+
+/*
+ * We can actually operate slabs any time after the page allocator is up.
+ * slab_is_available() merely means that the kmalloc array is available.
+ *
+ * However, be aware that deriving allocators depends on kmalloc being
+ * functional.
+ */
+int slabulator_up = 0;
+
+int slab_is_available(void) {
+ return slabulator_up;
+}
+
+void kmem_cache_init(void)
+{
+ extern void kmalloc_init(void);
+
+ kmalloc_init();
+ slabulator_up = 1;
+}
+
+struct slab_cache *kmem_cache_create(const char *name, size_t size,
+ size_t align, unsigned long flags,
+ void (*ctor)(void *, struct slab_cache *, unsigned long),
+ void (*dtor)(void *, struct slab_cache *, unsigned long))
+{
+ const struct page_allocator *a;
+ struct slab_cache s;
+ struct slab_cache *rs;
+ struct slab_control *x;
+ int page_size_slab;
+
+ s.offset = 0;
+ s.align = max(ARCH_SLAB_MINALIGN, ALIGN(align, sizeof(void *)));
+
+ if (flags & (SLAB_MUST_HWCACHE_ALIGN|SLAB_HWCACHE_ALIGN))
+ s.align = L1_CACHE_BYTES;
+
+ s.inuse = size;
+ s.objsize = size;
+ s.size = ALIGN(size, s.align);
+
+ /* Pick the right allocator for our purposes */
+ if (flags & SLAB_RECLAIM_ACCOUNT)
+ a = reclaimable_allocator;
+ else
+ a = unreclaimable_allocator;
+
+ if (flags & SLAB_CACHE_DMA)
+ a = dmaify_page_allocator(a);
+
+ if (flags & SLAB_DESTROY_BY_RCU)
+ a = rcuify_page_allocator(a);
+
+ page_size_slab = (PAGE_SIZE << calculate_order(s.size)) > (s.size << 1);
+
+ if (page_size_slab && ((flags & SLAB_DESTROY_BY_RCU) || ctor || dtor)) {
+ /*
+ * For RCU processing and constructors / destructors:
+ * The object must remain intact even if it is free.
+ * The free pointer would hurt us there.
+ * Relocate the free object pointer out of
+ * the space used by the object.
+ *
+ * Slabs with a single object do not need this since
+ * those do not have to deal with free pointers.
+ */
+ s.offset = s.size - sizeof(void *);
+ if (s.offset < s.objsize) {
+ /*
+ * Would overlap the object. We need to waste some
+ * more space to make the object RCU safe
+ */
+ s.offset = s.size;
+ s.size += s.align;
+ }
+ s.inuse = s.size;
+ }
+
+ s.order = calculate_order(s.size);
+
+ if (s.order < 0)
+ goto error;
+
+ s.name = name;
+ s.node = -1;
+
+ x = kmalloc(sizeof(struct slab_control), GFP_KERNEL);
+
+ if (!x)
+ return NULL;
+ s.page_alloc = a;
+ s.slab_alloc = &SLABULATOR_ALLOCATOR;
+#ifdef SLABULATOR_MERGE
+ /*
+ * This works but is this really something we want?
+ */
+ if (((s.size & (s.size - 1))==0) && !ctor && !dtor &&
+ !(flags & (SLAB_DESTROY_BY_RCU|SLAB_RECLAIM_ACCOUNT))) {
+
+ printk(KERN_INFO "Merging slab_cache %s size %d into"
+ " kmalloc array\n", name, s.size);
+ rs = kmalloc_slab_allocator.create(x, &s);
+ kfree(x);
+ x = NULL;
+ } else
+#endif
+ rs = SLABULATOR_ALLOCATOR.create(x, &s);
+ if (!rs)
+ goto error;
+
+ /*
+ * Now deal with constuctors and destructors. We need to know the
+ * slab_cache address in order to be able to pass the slab_cache
+ * address down the chain.
+ */
+ if (ctor || dtor)
+ rs->page_alloc =
+ ctor_and_dtor_for_page_allocator(rs->page_alloc,
+ rs->size, rs,
+ (void *)ctor, (void *)dtor);
+
+ if (x)
+ register_slab(rs);
+ return rs;
+
+error:
+ a->destructor((struct page_allocator *)a);
+ if (flags & SLAB_PANIC)
+ panic("Cannot create slab %s size=%ld realsize=%d "
+ "order=%d offset=%d flags=%lx\n",
+ s.name, size, s.size, s.order, s.offset, flags);
+
+
+ return NULL;
+}
+EXPORT_SYMBOL(kmem_cache_create);
+
+int kmem_cache_destroy(struct slab_cache *s)
+{
+ SLABULATOR_ALLOCATOR.destroy(s);
+ unregister_slab(s);
+ kfree(s);
+ return 0;
+}
+EXPORT_SYMBOL(kmem_cache_destroy);
+
+void *kmem_cache_zalloc(struct slab_cache *s, gfp_t flags)
+{
+ void *x;
+
+ x = kmem_cache_alloc(s, flags);
+ if (x)
+ memset(x, 0, s->objsize);
+ return x;
+}
+
+/*
+ * Generic reaper (the slabifier has its own way of reaping)
+ */
+#ifdef CONFIG_NUMA
+/*
+ * Special reaping functions for NUMA systems called from cache_reap().
+ */
+static DEFINE_PER_CPU(unsigned long, reap_node);
+
+static void init_reap_node(int cpu)
+{
+ int node;
+
+ node = next_node(cpu_to_node(cpu), node_online_map);
+ if (node == MAX_NUMNODES)
+ node = first_node(node_online_map);
+
+ __get_cpu_var(reap_node) = node;
+}
+
+static void next_reap_node(void)
+{
+ int node = __get_cpu_var(reap_node);
+
+ /*
+ * Also drain per cpu pages on remote zones
+ */
+ if (node != numa_node_id())
+ drain_node_pages(node);
+
+ node = next_node(node, node_online_map);
+ if (unlikely(node >= MAX_NUMNODES))
+ node = first_node(node_online_map);
+ __get_cpu_var(reap_node) = node;
+}
+#else
+#define init_reap_node(cpu) do { } while (0)
+#define next_reap_node(void) do { } while (0)
+#endif
+
+#define REAPTIMEOUT_CPUC (2*HZ)
+
+#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct work_struct, reap_work);
+
+static void cache_reap(void *unused)
+{
+ next_reap_node();
+ refresh_cpu_vm_stats(smp_processor_id());
+
+ schedule_delayed_work(&__get_cpu_var(reap_work),
+ REAPTIMEOUT_CPUC);
+}
+
+static void __devinit start_cpu_timer(int cpu)
+{
+ struct work_struct *reap_work = &per_cpu(reap_work, cpu);
+
+ /*
+ * When this gets called from do_initcalls via cpucache_init(),
+ * init_workqueues() has already run, so keventd will be setup
+ * at that time.
+ */
+ if (keventd_up() && reap_work->func == NULL) {
+ init_reap_node(cpu);
+ INIT_WORK(reap_work, cache_reap, NULL);
+ schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
+ }
+}
+
+static int __init cpucache_init(void)
+{
+ int cpu;
+
+ /*
+ * Register the timers that drain pcp pages and update vm statistics
+ */
+ for_each_online_cpu(cpu)
+ start_cpu_timer(cpu);
+ return 0;
+}
+__initcall(cpucache_init);
+#endif
+
+
Index: linux-2.6.18-rc4-mm3/include/linux/slabulator.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.18-rc4-mm3/include/linux/slabulator.h 2006-08-26 18:27:12.869848074 -0700
@@ -0,0 +1,123 @@
+#ifndef _LINUX_SLABULATOR_H
+#define _LINUX_SLABULATOR_H
+/*
+ * Slabulator: Emulate the existing Slab API.
+ *
+ * (C) 2006 Silicon Graphics, Inc.
+ * Christoph Lameter <clameter@xxxxxxx>
+ */
+
+#include <linux/allocator.h>
+#include <linux/kmalloc.h>
+
+#define kmem_cache_t struct slab_cache
+#define kmem_cache slab_cache
+
+#ifndef SLABULATOR_ALLOCATOR
+#define SLABULATOR_ALLOCATOR slabifier_allocator
+#endif
+
+/*
+ * We really should be getting rid of these. This is only
+ * a select list/
+ */
+#define SLAB_KERNEL GFP_KERNEL
+#define SLAB_ATOMIC GFP_ATOMIC
+#define SLAB_NOFS GFP_NOFS
+#define SLAB_NOIO GFP_NOIO
+
+/* No debug features for now */
+#define SLAB_HWCACHE_ALIGN 0x00002000UL
+#define SLAB_CACHE_DMA 0x00004000UL
+#define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL
+#define SLAB_RECLAIM_ACCOUNT 0x00020000UL
+#define SLAB_PANIC 0x00040000UL
+#define SLAB_DESTROY_BY_RCU 0x00080000UL
+#define SLAB_MEM_SPREAD 0x00100000UL
+
+/* flags passed to a constructor func */
+#define SLAB_CTOR_CONSTRUCTOR 0x001UL
+#define SLAB_CTOR_ATOMIC 0x002UL
+#define SLAB_CTOR_VERIFY 0x004UL
+
+/*
+ * slab_allocators are always available after the page allocator
+ * has been brought up. kmem_cache_init creates the kmalloc array:
+ */
+extern int slab_is_available(void);
+extern void kmem_cache_init(void);
+
+/* System wide caches (Should these be really here?) */
+extern struct slab_cache *vm_area_cachep;
+extern struct slab_cache *names_cachep;
+extern struct slab_cache *files_cachep;
+extern struct slab_cache *filp_cachep;
+extern struct slab_cache *fs_cachep;
+extern struct slab_cache *sighand_cachep;
+extern struct slab_cache *bio_cachep;
+
+extern struct slab_cache *kmem_cache_create(const char *name, size_t size,
+ size_t align, unsigned long flags,
+ void (*ctor)(void *, struct slab_cache *, unsigned long),
+ void (*dtor)(void *, struct slab_cache *, unsigned long));
+
+static inline unsigned int kmem_cache_size(struct slab_cache *s)
+{
+ return s->objsize;
+}
+
+static inline const char *kmem_cache_name(struct slab_cache *s)
+{
+ return s->name;
+}
+
+static inline void *kmem_cache_alloc(struct slab_cache *s, gfp_t flags)
+{
+ return SLABULATOR_ALLOCATOR.alloc(s, flags);
+}
+
+static inline void *kmem_cache_alloc_node(struct slab_cache *s,
+ gfp_t flags, int node)
+{
+ return SLABULATOR_ALLOCATOR.alloc_node(s, flags, node);
+}
+
+extern void *kmem_cache_zalloc(struct slab_cache *s, gfp_t flags);
+
+static inline void kmem_cache_free(struct slab_cache *s, const void *x)
+{
+ SLABULATOR_ALLOCATOR.free(s, x);
+}
+
+static inline int kmem_ptr_validate(struct slab_cache *s, void *x)
+{
+ return SLABULATOR_ALLOCATOR.valid_pointer(s, x);
+}
+
+extern int kmem_cache_destroy(struct slab_cache *s);
+
+static inline int kmem_cache_shrink(struct slab_cache *s)
+{
+ return SLABULATOR_ALLOCATOR.shrink(s, NULL);
+}
+
+/**
+ * kcalloc - allocate memory for an array. The memory is set to zero.
+ * @n: number of elements.
+ * @size: element size.
+ * @flags: the type of memory to allocate.
+ */
+static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
+{
+ if (n != 0 && size > ULONG_MAX / n)
+ return NULL;
+ return kzalloc(n * size, flags);
+}
+
+/* No current shrink statistics */
+struct shrinker;
+static inline void kmem_set_shrinker(kmem_cache_t *cachep,
+ struct shrinker *shrinker)
+{}
+#endif /* _LINUX_SLABULATOR_H */
+
Index: linux-2.6.18-rc4-mm3/mm/Makefile
===================================================================
--- linux-2.6.18-rc4-mm3.orig/mm/Makefile 2006-08-26 18:27:10.289929422 -0700
+++ linux-2.6.18-rc4-mm3/mm/Makefile 2006-08-26 18:27:12.870824576 -0700
@@ -25,4 +25,5 @@ obj-$(CONFIG_MEMORY_HOTPLUG) += memory_h
obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_SMP) += allocpercpu.o
-obj-$(CONFIG_MODULAR_SLAB) += allocator.o slabifier.o slabstat.o kmalloc.o
+obj-$(CONFIG_MODULAR_SLAB) += allocator.o slabifier.o slabstat.o \
+ kmalloc.o slabulator.o
Index: linux-2.6.18-rc4-mm3/init/Kconfig
===================================================================
--- linux-2.6.18-rc4-mm3.orig/init/Kconfig 2006-08-26 16:38:04.676887088 -0700
+++ linux-2.6.18-rc4-mm3/init/Kconfig 2006-08-26 18:27:12.871801078 -0700
@@ -332,6 +332,26 @@ config CC_OPTIMIZE_FOR_SIZE

If unsure, say N.

+config SLAB
+ default y
+ bool "Traditional SLAB allocator"
+ help
+ Disabling this allows the use of alternate slab allocators
+ with less overhead such as SLOB (very simple) or the
+ use the slabifier with the module allocator framework.
+ Note that alternate slab allocators may not provide
+ the complete functionality for slab.
+
+config MODULAR_SLAB
+ default y
+ bool "Use the modular allocator framework"
+ depends on EXPERIMENTAL && !SLAB
+ help
+ The modular allocator framework allows the flexible use
+ of different slab allocators and page allocators for memory
+ allocation. This will completely replace the existing
+ slab allocator. Beware this is experimental code.
+
menuconfig EMBEDDED
bool "Configure standard kernel features (for small systems)"
help
@@ -370,7 +390,6 @@ config KALLSYMS_EXTRA_PASS
reported. KALLSYMS_EXTRA_PASS is only a temporary workaround while
you wait for kallsyms to be fixed.

-
config HOTPLUG
bool "Support for hot-pluggable devices" if EMBEDDED
default y
@@ -445,15 +464,6 @@ config SHMEM
option replaces shmem and tmpfs with the much simpler ramfs code,
which may be appropriate on small systems without swap.

-config SLAB
- default y
- bool "Use full SLAB allocator" if EMBEDDED
- help
- Disabling this replaces the advanced SLAB allocator and
- kmalloc support with the drastically simpler SLOB allocator.
- SLOB is more space efficient but does not scale well and is
- more susceptible to fragmentation.
-
config VM_EVENT_COUNTERS
default y
bool "Enable VM event counters for /proc/vmstat" if EMBEDDED
@@ -475,7 +485,7 @@ config BASE_SMALL
default 1 if !BASE_FULL

config SLOB
- default !SLAB
+ default !SLAB && !MODULAR_SLAB
bool

menu "Loadable module support"
Index: linux-2.6.18-rc4-mm3/include/linux/slab.h
===================================================================
--- linux-2.6.18-rc4-mm3.orig/include/linux/slab.h 2006-08-26 16:38:04.426902539 -0700
+++ linux-2.6.18-rc4-mm3/include/linux/slab.h 2006-08-26 18:27:12.871801078 -0700
@@ -9,6 +9,10 @@

#if defined(__KERNEL__)

+#ifdef CONFIG_MODULAR_SLAB
+#include <linux/slabulator.h>
+#else
+
typedef struct kmem_cache kmem_cache_t;

#include <linux/gfp.h>
@@ -291,6 +295,8 @@ extern kmem_cache_t *fs_cachep;
extern kmem_cache_t *sighand_cachep;
extern kmem_cache_t *bio_cachep;

+#endif /* CONFIG_SLABULATOR */
+
#endif /* __KERNEL__ */

#endif /* _LINUX_SLAB_H */
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Adrian Bunk: "2.6.18-rc4-mm3: NF_CONNTRACK_FTP=y compile error"
Previous message: Christoph Lameter: "[MODSLAB 3/4] A Kmalloc subsystem"
In reply to: Christoph Lameter: "[MODSLAB 3/4] A Kmalloc subsystem"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]