[PATCH 4/5] mm: slob allocation fairness

From: Peter Zijlstra
Date: Mon May 14 2007 - 09:38:30 EST


The slob allocator has some unfairness wrt gfp flags; when the slob space is
grown the gfp flags are used to allocate more memory, however when there is
slob space available gfp flags are ignored.

Thus it is possible for less critical slob allocations to succeed and gobble
up precious memory when under memory pressure.

This patch solves that by using the newly introduced page allocation rank.

Page allocation rank is a scalar quantity connecting ALLOC_ and gfp flags which
represents how deep we had to reach into our reserves when allocating a page.
Rank 0 is the deepest we can reach (ALLOC_NO_WATERMARK) and 16 is the most
shallow allocation possible (ALLOC_WMARK_HIGH).

When the slob space is grown the rank of the page allocation is stored. For
each slob allocation we test the given gfp flags against this rank. Thereby
asking the question: would these flags have allowed the slob to grow.

If not so, we need to test the current situation. This is done by forcing the
growth of the slob space. (Just testing the free page limits will not work due
to direct reclaim) Failing this we need to fail the slob allocation.

Thus if we grew the slob under great duress while PF_MEMALLOC was set and we
really did access the memalloc reserve the rank would be set to 0. If the next
allocation to that slob would be GFP_NOFS|__GFP_NOMEMALLOC (which ordinarily
maps to rank 4 and always > 0) we'd want to make sure that memory pressure has
decreased enough to allow an allocation with the given gfp flags.

So in this case we try to force grow the slob space and on failure we fail the
slob allocation. Thus preserving the available slob space for more pressing
allocations.

[netperf results]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
Cc: Matt Mackall <mpm@xxxxxxxxxxx>
---
mm/Kconfig | 1 -
mm/slob.c | 25 ++++++++++++++++++++++---
2 files changed, 22 insertions(+), 4 deletions(-)

Index: linux-2.6-git/mm/slob.c
===================================================================
--- linux-2.6-git.orig/mm/slob.c
+++ linux-2.6-git/mm/slob.c
@@ -35,6 +35,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/timer.h>
+#include "internal.h"

struct slob_block {
int units;
@@ -53,6 +54,7 @@ struct bigblock {
};
typedef struct bigblock bigblock_t;

+static struct { int rank; } slobrank = { .rank = MAX_ALLOC_RANK };
static slob_t arena = { .next = &arena, .units = 1 };
static slob_t *slobfree = &arena;
static bigblock_t *bigblocks;
@@ -62,12 +64,29 @@ static DEFINE_SPINLOCK(block_lock);
static void slob_free(void *b, int size);
static void slob_timer_cbk(void);

+static unsigned long slob_get_free_pages(gfp_t flags, int order)
+{
+ struct page *page = alloc_pages(gfp, order);
+ if (!page)
+ return 0;
+ slobrank.rank = page->index;
+ return (unsigned long)page_address(page);
+}

static void *slob_alloc(size_t size, gfp_t gfp, int align)
{
slob_t *prev, *cur, *aligned = 0;
int delta = 0, units = SLOB_UNITS(size);
unsigned long flags;
+ int rank = slab_alloc_rank(gfp);
+
+ if (slab_insufficient_rank(&slobrank, rank)) {
+ struct page *page = alloc_page(gfp);
+ if (!page)
+ return NULL;
+ slobrank.rank = page->index;
+ __free_page(page);
+ }

spin_lock_irqsave(&slob_lock, flags);
prev = slobfree;
@@ -105,7 +124,7 @@ static void *slob_alloc(size_t size, gfp
if (size == PAGE_SIZE) /* trying to shrink arena? */
return 0;

- cur = (slob_t *)__get_free_page(gfp);
+ cur = (slob_t *)slob_get_free_pages(gfp, 0);
if (!cur)
return 0;

@@ -166,7 +185,7 @@ void *__kmalloc(size_t size, gfp_t gfp)
return 0;

bb->order = get_order(size);
- bb->pages = (void *)__get_free_pages(gfp, bb->order);
+ bb->pages = (void *)slob_get_free_pages(gfp, bb->order);

if (bb->pages) {
spin_lock_irqsave(&block_lock, flags);
@@ -309,7 +328,7 @@ void *kmem_cache_alloc(struct kmem_cache
if (c->size < PAGE_SIZE)
b = slob_alloc(c->size, flags, c->align);
else
- b = (void *)__get_free_pages(flags, get_order(c->size));
+ b = (void *)slob_get_free_pages(flags, get_order(c->size));

if (c->ctor)
c->ctor(b, c, SLAB_CTOR_CONSTRUCTOR);
Index: linux-2.6-git/mm/Kconfig
===================================================================
--- linux-2.6-git.orig/mm/Kconfig
+++ linux-2.6-git/mm/Kconfig
@@ -165,7 +165,6 @@ config ZONE_DMA_FLAG

config SLAB_FAIR
def_bool n
- depends on SLAB || SLUB

config NR_QUICK
int

--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/