Re: [PATCH 04/10] mm: slub: add knowledge of reserve pages

From: Peter Zijlstra
Date: Mon Aug 20 2007 - 03:39:13 EST


Ok, so I got rid of the global stuff, this also obsoletes 3/10.


---
Subject: mm: slub: add knowledge of reserve pages

Restrict objects from reserve slabs (ALLOC_NO_WATERMARKS) to allocation
contexts that are entitled to it.

Care is taken to only touch the SLUB slow path.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
Cc: Christoph Lameter <clameter@xxxxxxx>
---
mm/slub.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 69 insertions(+), 18 deletions(-)

Index: linux-2.6-2/mm/slub.c
===================================================================
--- linux-2.6-2.orig/mm/slub.c
+++ linux-2.6-2/mm/slub.c
@@ -20,11 +20,12 @@
#include <linux/mempolicy.h>
#include <linux/ctype.h>
#include <linux/kallsyms.h>
+#include "internal.h"

/*
* Lock order:
* 1. slab_lock(page)
- * 2. slab->list_lock
+ * 2. node->list_lock
*
* The slab_lock protects operations on the object of a particular
* slab and its metadata in the page struct. If the slab lock
@@ -1069,7 +1070,7 @@ static void setup_object(struct kmem_cac
s->ctor(object, s, 0);
}

-static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
+static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node, int *reserve)
{
struct page *page;
struct kmem_cache_node *n;
@@ -1087,6 +1088,7 @@ static struct page *new_slab(struct kmem
if (!page)
goto out;

+ *reserve = page->reserve;
n = get_node(s, page_to_nid(page));
if (n)
atomic_long_inc(&n->nr_slabs);
@@ -1403,12 +1405,36 @@ static inline void flush_slab(struct kme
}

/*
+ * cpu slab reserve magic
+ *
+ * we mark reserve status in the lsb of the ->cpu_slab[] pointer.
+ */
+static inline unsigned long cpu_slab_reserve(struct kmem_cache *s, int cpu)
+{
+ return unlikely((unsigned long)s->cpu_slab[cpu] & 1);
+}
+
+static inline void
+cpu_slab_set(struct kmem_cache *s, int cpu, struct page *page, int reserve)
+{
+ if (unlikely(reserve))
+ page = (struct page *)((unsigned long)page | 1);
+
+ s->cpu_slab[cpu] = page;
+}
+
+static inline struct page *cpu_slab(struct kmem_cache *s, int cpu)
+{
+ return (struct page *)((unsigned long)s->cpu_slab[cpu] & ~1UL);
+}
+
+/*
* Flush cpu slab.
* Called from IPI handler with interrupts disabled.
*/
static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
{
- struct page *page = s->cpu_slab[cpu];
+ struct page *page = cpu_slab(s, cpu);

if (likely(page))
flush_slab(s, page, cpu);
@@ -1457,10 +1483,22 @@ static void *__slab_alloc(struct kmem_ca
{
void **object;
int cpu = smp_processor_id();
+ int reserve = 0;

if (!page)
goto new_slab;

+ if (cpu_slab_reserve(s, cpu)) {
+ /*
+ * If the current slab is a reserve slab and the current
+ * allocation context does not allow access to the reserves
+ * we must force an allocation to test the current levels.
+ */
+ if (!(gfp_to_alloc_flags(gfpflags) & ALLOC_NO_WATERMARKS))
+ goto alloc_slab;
+ reserve = 1;
+ }
+
slab_lock(page);
if (unlikely(node != -1 && page_to_nid(page) != node))
goto another_slab;
@@ -1468,10 +1506,9 @@ load_freelist:
object = page->freelist;
if (unlikely(!object))
goto another_slab;
- if (unlikely(SlabDebug(page)))
+ if (unlikely(SlabDebug(page) || reserve))
goto debug;

- object = page->freelist;
page->lockless_freelist = object[page->offset];
page->inuse = s->objects;
page->freelist = NULL;
@@ -1484,14 +1521,28 @@ another_slab:
new_slab:
page = get_partial(s, gfpflags, node);
if (page) {
- s->cpu_slab[cpu] = page;
+ cpu_slab_set(s, cpu, page, reserve);
goto load_freelist;
}

- page = new_slab(s, gfpflags, node);
+alloc_slab:
+ page = new_slab(s, gfpflags, node, &reserve);
if (page) {
+ struct page *slab;
+
cpu = smp_processor_id();
- if (s->cpu_slab[cpu]) {
+ slab = cpu_slab(s, cpu);
+
+ if (cpu_slab_reserve(s, cpu) && !reserve) {
+ /*
+ * If the current cpu_slab is a reserve slab but we
+ * managed to allocate a new slab the pressure is
+ * lifted and we can unmark the current one.
+ */
+ cpu_slab_set(s, cpu, slab, 0);
+ }
+
+ if (slab) {
/*
* Someone else populated the cpu_slab while we
* enabled interrupts, or we have gotten scheduled
@@ -1499,29 +1550,28 @@ new_slab:
* requested node even if __GFP_THISNODE was
* specified. So we need to recheck.
*/
- if (node == -1 ||
- page_to_nid(s->cpu_slab[cpu]) == node) {
+ if (node == -1 || page_to_nid(slab) == node) {
/*
* Current cpuslab is acceptable and we
* want the current one since its cache hot
*/
discard_slab(s, page);
- page = s->cpu_slab[cpu];
+ page = slab;
slab_lock(page);
goto load_freelist;
}
/* New slab does not fit our expectations */
- flush_slab(s, s->cpu_slab[cpu], cpu);
+ flush_slab(s, slab, cpu);
}
slab_lock(page);
SetSlabFrozen(page);
- s->cpu_slab[cpu] = page;
+ cpu_slab_set(s, cpu, page, reserve);
goto load_freelist;
}
return NULL;
debug:
- object = page->freelist;
- if (!alloc_debug_processing(s, page, object, addr))
+ if (SlabDebug(page) &&
+ !alloc_debug_processing(s, page, object, addr))
goto another_slab;

page->inuse++;
@@ -1548,7 +1598,7 @@ static void __always_inline *slab_alloc(
unsigned long flags;

local_irq_save(flags);
- page = s->cpu_slab[smp_processor_id()];
+ page = cpu_slab(s, smp_processor_id());
if (unlikely(!page || !page->lockless_freelist ||
(node != -1 && page_to_nid(page) != node)))

@@ -1873,10 +1923,11 @@ static struct kmem_cache_node * __init e
{
struct page *page;
struct kmem_cache_node *n;
+ int reserve;

BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));

- page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node);
+ page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node, &reserve);

BUG_ON(!page);
n = page->freelist;
@@ -3189,7 +3240,7 @@ static unsigned long slab_objects(struct
per_cpu = nodes + nr_node_ids;

for_each_possible_cpu(cpu) {
- struct page *page = s->cpu_slab[cpu];
+ struct page *page = cpu_slab(s, cpu);
int node;

if (page) {

Attachment: signature.asc
Description: This is a digitally signed message part