[RFC][PATCH 2/3] mm: slab: move around slab ->freelist for cmpxchg

From: Dave Hansen
Date: Wed Dec 11 2013 - 17:40:48 EST



The write-argument to cmpxchg_double() must be 16-byte aligned.
We used to align 'struct page' itself in order to guarantee this,
but that wastes 8-bytes per page. Instead, we take 8-bytes
internal to the page before page->counters and move freelist
between there and the existing 8-bytes after counters. That way,
no matter how 'stuct page' itself is aligned, we can ensure that
we have a 16-byte area with which to to this cmpxchg.



---

linux.git-davehans/include/linux/mm_types.h | 17 +++++--
linux.git-davehans/mm/slab.c | 2
linux.git-davehans/mm/slab.h | 1
linux.git-davehans/mm/slob.c | 2
linux.git-davehans/mm/slub.c | 67 +++++++++++++++++++++++-----
5 files changed, 74 insertions(+), 15 deletions(-)

diff -puN include/linux/mm_types.h~move-around-freelist-to-align include/linux/mm_types.h
--- linux.git/include/linux/mm_types.h~move-around-freelist-to-align 2013-12-11 13:19:54.334963497 -0800
+++ linux.git-davehans/include/linux/mm_types.h 2013-12-11 13:19:54.344963939 -0800
@@ -140,11 +140,20 @@ struct slab_page {
/* First double word block */
unsigned long flags; /* Atomic flags, some possibly
* updated asynchronously */
- void *s_mem; /* slab first object */
+ union {
+ void *s_mem; /* slab first object */
+ /*
+ * The combination of ->counters and ->freelist
+ * need to be doubleword-aligned in order for
+ * slub's cmpxchg_double() to work properly.
+ * slub does not use 's_mem', so we reuse it here
+ * so we can always have alignment no matter how
+ * struct page is aligned.
+ */
+ void *_freelist_first; /* sl[aou]b first free object */
+ };

/* Second double word */
- void *_freelist; /* sl[aou]b first free object */
-
union {
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
@@ -173,6 +182,8 @@ struct slab_page {
unsigned int active; /* SLAB */
};

+ void *_freelist_second; /* sl[aou]b first free object */
+
/* Third double word block */
union {
struct { /* slub per cpu partial pages */
diff -puN mm/slab.c~move-around-freelist-to-align mm/slab.c
--- linux.git/mm/slab.c~move-around-freelist-to-align 2013-12-11 13:19:54.335963541 -0800
+++ linux.git-davehans/mm/slab.c 2013-12-11 13:19:54.345963983 -0800
@@ -1952,7 +1952,7 @@ static void slab_destroy_debugcheck(stru

static inline unsigned int **slab_freelist_ptr(struct slab_page *page)
{
- return (unsigned int **)&page->_freelist;
+ return (unsigned int **)&page->_freelist_first;
}

static inline unsigned int *slab_freelist(struct slab_page *page)
diff -puN mm/slab.h~move-around-freelist-to-align mm/slab.h
--- linux.git/mm/slab.h~move-around-freelist-to-align 2013-12-11 13:19:54.337963630 -0800
+++ linux.git-davehans/mm/slab.h 2013-12-11 13:19:54.346964027 -0800
@@ -278,3 +278,4 @@ struct kmem_cache_node {

void *slab_next(struct seq_file *m, void *p, loff_t *pos);
void slab_stop(struct seq_file *m, void *p);
+
diff -puN mm/slob.c~move-around-freelist-to-align mm/slob.c
--- linux.git/mm/slob.c~move-around-freelist-to-align 2013-12-11 13:19:54.339963718 -0800
+++ linux.git-davehans/mm/slob.c 2013-12-11 13:19:54.346964027 -0800
@@ -213,7 +213,7 @@ static void slob_free_pages(void *b, int

static inline void **slab_freelist_ptr(struct slab_page *sp)
{
- return &sp->_freelist;
+ return &sp->_freelist_first;
}

static inline void *slab_freelist(struct slab_page *sp)
diff -puN mm/slub.c~move-around-freelist-to-align mm/slub.c
--- linux.git/mm/slub.c~move-around-freelist-to-align 2013-12-11 13:19:54.340963762 -0800
+++ linux.git-davehans/mm/slub.c 2013-12-11 13:19:54.348964116 -0800
@@ -228,9 +228,23 @@ static inline void stat(const struct kme
#endif
}

-static inline void **slab_freelist_ptr(struct slab_page *spage)
+static inline bool ptr_doubleword_aligned(void *ptr)
{
- return &spage->_freelist;
+ int doubleword_bytes = BITS_PER_LONG * 2 / 8;
+ if (PTR_ALIGN(ptr, doubleword_bytes) == ptr)
+ return 1;
+ return 0;
+}
+
+void **slab_freelist_ptr(struct slab_page *spage)
+{
+ /*
+ * If counters is aligned, then we use the ->freelist
+ * slot _after_ it.
+ */
+ if (ptr_doubleword_aligned(&spage->counters))
+ return &spage->_freelist_second;
+ return &spage->_freelist_first;
}

static inline void *slab_freelist(struct slab_page *spage)
@@ -380,6 +394,39 @@ static __always_inline void slab_unlock(
__bit_spin_unlock(PG_locked, &page->flags);
}

+/*
+ * Take two adjecent 8b-aligned, but non-doubleword-aligned
+ * arguments and swap them around to guarantee that the
+ * first arg is doubleword-aligned.
+ *
+ * The write-argument to cmpxchg_double() must be 16-byte
+ * aligned. We used to align 'struct page' itself in order
+ * to guarantee this, but that wastes 8-bytes per page.
+ * Instead, we take 8-bytes internal to the page before
+ * page->counters and move freelist between there and the
+ * existing 8-bytes after counters. That way, no matter
+ * how 'stuct page' itself is aligned, we can ensure that
+ * we have a 16-byte area with which to to this cmpxchg.
+ */
+static inline bool __cmpxchg_double_slab_unaligned(struct slab_page *page,
+ void *freelist_old, unsigned long counters_old,
+ void *freelist_new, unsigned long counters_new)
+{
+ void **freelist = slab_freelist_ptr(page);
+ if (ptr_doubleword_aligned(&page->counters)) {
+ if (cmpxchg_double(&page->counters, freelist,
+ counters_old, freelist_old,
+ counters_new, freelist_new))
+ return 1;
+ } else {
+ if (cmpxchg_double(freelist, &page->counters,
+ freelist_old, counters_old,
+ freelist_new, counters_new))
+ return 1;
+ }
+ return 0;
+}
+
/* Interrupts must be disabled (for the fallback code to work right) */
static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab_page *page,
void *freelist_old, unsigned long counters_old,
@@ -390,10 +437,10 @@ static inline bool __cmpxchg_double_slab
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (s->flags & __CMPXCHG_DOUBLE) {
- if (cmpxchg_double(slab_freelist_ptr(page), &page->counters,
- freelist_old, counters_old,
- freelist_new, counters_new))
- return 1;
+ if (__cmpxchg_double_slab_unaligned(page,
+ freelist_old, counters_old,
+ freelist_new, counters_new))
+ return 1;
} else
#endif
{
@@ -426,10 +473,10 @@ static inline bool cmpxchg_double_slab(s
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (s->flags & __CMPXCHG_DOUBLE) {
- if (cmpxchg_double(slab_freelist_ptr(page), &page->counters,
- freelist_old, counters_old,
- freelist_new, counters_new))
- return 1;
+ if (__cmpxchg_double_slab_unaligned(page,
+ freelist_old, counters_old,
+ freelist_new, counters_new))
+ return 1;
} else
#endif
{
_
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/