[RFC 03/31] mars: add new module brick_mem

From: Thomas Schoebel-Theuer
Date: Thu Dec 31 2015 - 06:38:53 EST


Signed-off-by: Thomas Schoebel-Theuer <tst@xxxxxxxxxxxxxxxxxx>
---
drivers/staging/mars/brick_mem.c | 1081 ++++++++++++++++++++++++++++++++++++++
include/linux/brick/brick_mem.h | 218 ++++++++
2 files changed, 1299 insertions(+)
create mode 100644 drivers/staging/mars/brick_mem.c
create mode 100644 include/linux/brick/brick_mem.h

diff --git a/drivers/staging/mars/brick_mem.c b/drivers/staging/mars/brick_mem.c
new file mode 100644
index 0000000..03a3d28
--- /dev/null
+++ b/drivers/staging/mars/brick_mem.c
@@ -0,0 +1,1081 @@
+/*
+ * MARS Long Distance Replication Software
+ *
+ * Copyright (C) 2010-2014 Thomas Schoebel-Theuer
+ * Copyright (C) 2011-2014 1&1 Internet AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+
+#include <linux/atomic.h>
+
+#include <linux/brick/brick_mem.h>
+#include <linux/brick/brick_say.h>
+#include <linux/brick/lamport.h>
+
+#define USE_KERNEL_PAGES /* currently mandatory (vmalloc does not work) */
+
+#define MAGIC_BLOCK ((int)0x8B395D7B)
+#define MAGIC_BEND ((int)0x8B395D7C)
+#define MAGIC_MEM1 ((int)0x8B395D7D)
+#define MAGIC_MEM2 ((int)0x9B395D8D)
+#define MAGIC_MEND1 ((int)0x8B395D7E)
+#define MAGIC_MEND2 ((int)0x9B395D8E)
+#define MAGIC_STR ((int)0x8B395D7F)
+#define MAGIC_SEND ((int)0x9B395D8F)
+
+#define INT_ACCESS(ptr, offset) (*(int *)(((char *)(ptr)) + (offset)))
+
+#define _BRICK_FMT(_fmt, _class) \
+ "%ld.%09ld %ld.%09ld MEM_%-5s %s[%d] %s:%d %s(): " \
+ _fmt, \
+ _s_now.tv_sec, _s_now.tv_nsec, \
+ _l_now.tv_sec, _l_now.tv_nsec, \
+ say_class[_class], \
+ current->comm, (int)smp_processor_id(), \
+ __BASE_FILE__, \
+ __LINE__, \
+ __func__
+
+#define _BRICK_MSG(_class, _dump, _fmt, _args...) \
+ do { \
+ struct timespec _s_now = CURRENT_TIME; \
+ struct timespec _l_now; \
+ get_lamport(&_l_now); \
+ say(_class, _BRICK_FMT(_fmt, _class), ##_args); \
+ if (_dump) \
+ dump_stack(); \
+ } while (0)
+
+#define BRICK_ERR(_fmt, _args...) _BRICK_MSG(SAY_ERROR, true, _fmt, ##_args)
+#define BRICK_WRN(_fmt, _args...) _BRICK_MSG(SAY_WARN, false, _fmt, ##_args)
+#define BRICK_INF(_fmt, _args...) _BRICK_MSG(SAY_INFO, false, _fmt, ##_args)
+
+/***********************************************************************/
+
+/* limit handling */
+
+#include <linux/swap.h>
+
+long long brick_global_memavail;
+long long brick_global_memlimit;
+
+atomic64_t brick_global_block_used = ATOMIC64_INIT(0);
+
+void get_total_ram(void)
+{
+ struct sysinfo i = {};
+
+ si_meminfo(&i);
+ /* si_swapinfo(&i); */
+ brick_global_memavail = (long long)i.totalram * (PAGE_SIZE / 1024);
+ BRICK_INF("total RAM = %lld [KiB]\n", brick_global_memavail);
+}
+
+/***********************************************************************/
+
+/* small memory allocation (use this only for len < PAGE_SIZE) */
+
+#ifdef BRICK_DEBUG_MEM
+static atomic_t phys_mem_alloc = ATOMIC_INIT(0);
+static atomic_t mem_redirect_alloc = ATOMIC_INIT(0);
+static atomic_t mem_count[BRICK_DEBUG_MEM];
+static atomic_t mem_free[BRICK_DEBUG_MEM];
+static int mem_len[BRICK_DEBUG_MEM];
+
+#define PLUS_SIZE (6 * sizeof(int))
+#else
+#define PLUS_SIZE (2 * sizeof(int))
+#endif
+
+static inline
+void *__brick_mem_alloc(int len)
+{
+ void *res;
+
+ if (len >= PAGE_SIZE) {
+#ifdef BRICK_DEBUG_MEM
+ atomic_inc(&mem_redirect_alloc);
+#endif
+ res = _brick_block_alloc(0, len, 0);
+ } else {
+ for (;;) {
+ res = kmalloc(len, GFP_BRICK);
+ if (likely(res))
+ break;
+ msleep(1000);
+ }
+#ifdef BRICK_DEBUG_MEM
+ atomic_inc(&phys_mem_alloc);
+#endif
+ }
+ return res;
+}
+
+static inline
+void __brick_mem_free(void *data, int len)
+{
+ if (len >= PAGE_SIZE) {
+ _brick_block_free(data, len, 0);
+#ifdef BRICK_DEBUG_MEM
+ atomic_dec(&mem_redirect_alloc);
+#endif
+ } else {
+ kfree(data);
+#ifdef BRICK_DEBUG_MEM
+ atomic_dec(&phys_mem_alloc);
+#endif
+ }
+}
+
+void *_brick_mem_alloc(int len, int line)
+{
+ void *res;
+
+#ifdef CONFIG_MARS_DEBUG
+ might_sleep();
+#endif
+
+ res = __brick_mem_alloc(len + PLUS_SIZE);
+
+#ifdef BRICK_DEBUG_MEM
+ if (unlikely(line < 0))
+ line = 0;
+ else if (unlikely(line >= BRICK_DEBUG_MEM))
+ line = BRICK_DEBUG_MEM - 1;
+ INT_ACCESS(res, 0 * sizeof(int)) = MAGIC_MEM1;
+ INT_ACCESS(res, 1 * sizeof(int)) = len;
+ INT_ACCESS(res, 2 * sizeof(int)) = line;
+ INT_ACCESS(res, 3 * sizeof(int)) = MAGIC_MEM2;
+ res += 4 * sizeof(int);
+ INT_ACCESS(res, len + 0 * sizeof(int)) = MAGIC_MEND1;
+ INT_ACCESS(res, len + 1 * sizeof(int)) = MAGIC_MEND2;
+ atomic_inc(&mem_count[line]);
+ mem_len[line] = len;
+#else
+ INT_ACCESS(res, 0 * sizeof(int)) = len;
+ res += PLUS_SIZE;
+#endif
+ return res;
+}
+
+void _brick_mem_free(void *data, int cline)
+{
+#ifdef BRICK_DEBUG_MEM
+ void *test = data - 4 * sizeof(int);
+ int magic1 = INT_ACCESS(test, 0 * sizeof(int));
+ int len = INT_ACCESS(test, 1 * sizeof(int));
+ int line = INT_ACCESS(test, 2 * sizeof(int));
+ int magic2 = INT_ACCESS(test, 3 * sizeof(int));
+
+ if (unlikely(magic1 != MAGIC_MEM1)) {
+ BRICK_ERR("line %d memory corruption: magix1 %08x != %08x, len = %d\n",
+ cline,
+ magic1,
+ MAGIC_MEM1,
+ len);
+ goto _out_return;
+ }
+ if (unlikely(magic2 != MAGIC_MEM2)) {
+ BRICK_ERR("line %d memory corruption: magix2 %08x != %08x, len = %d\n",
+ cline,
+ magic2,
+ MAGIC_MEM2,
+ len);
+ goto _out_return;
+ }
+ if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
+ BRICK_ERR("line %d memory corruption: alloc line = %d, len = %d\n", cline, line, len);
+ goto _out_return;
+ }
+ INT_ACCESS(test, 0) = 0xffffffff;
+ magic1 = INT_ACCESS(data, len + 0 * sizeof(int));
+ if (unlikely(magic1 != MAGIC_MEND1)) {
+ BRICK_ERR("line %d memory corruption: magix1 %08x != %08x, len = %d\n",
+ cline,
+ magic1,
+ MAGIC_MEND1,
+ len);
+ goto _out_return;
+ }
+ magic2 = INT_ACCESS(data, len + 1 * sizeof(int));
+ if (unlikely(magic2 != MAGIC_MEND2)) {
+ BRICK_ERR("line %d memory corruption: magix2 %08x != %08x, len = %d\n",
+ cline,
+ magic2,
+ MAGIC_MEND2,
+ len);
+ goto _out_return;
+ }
+ INT_ACCESS(data, len) = 0xffffffff;
+ atomic_dec(&mem_count[line]);
+ atomic_inc(&mem_free[line]);
+#else
+ void *test = data - PLUS_SIZE;
+ int len = INT_ACCESS(test, 0 * sizeof(int));
+
+#endif
+ data = test;
+ __brick_mem_free(data, len + PLUS_SIZE);
+#ifdef BRICK_DEBUG_MEM
+_out_return:;
+#endif
+}
+
+/***********************************************************************/
+
+/* string memory allocation */
+
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+# define STRING_CANARY \
+ "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \
+ "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" \
+ "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \
+ "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \
+ "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" \
+ "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \
+ "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \
+ "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" \
+ "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \
+ " FILE = " __FILE__ \
+ " VERSION = " __VERSION__ \
+ " xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx STRING_error xxx\n"
+# define STRING_PLUS (sizeof(int) * 3 + sizeof(STRING_CANARY))
+#elif defined(BRICK_DEBUG_MEM)
+# define STRING_PLUS (sizeof(int) * 4)
+#else
+# define STRING_PLUS 0
+#endif
+
+#ifdef BRICK_DEBUG_MEM
+static atomic_t phys_string_alloc = ATOMIC_INIT(0);
+static atomic_t string_count[BRICK_DEBUG_MEM];
+static atomic_t string_free[BRICK_DEBUG_MEM];
+
+#endif
+
+char *_brick_string_alloc(int len, int line)
+{
+ char *res;
+
+#ifdef CONFIG_MARS_DEBUG
+ might_sleep();
+ if (unlikely(len > PAGE_SIZE))
+ BRICK_WRN("line = %d string too long: len = %d\n", line, len);
+#endif
+ if (len <= 0)
+ len = BRICK_STRING_LEN;
+
+ for (;;) {
+ res = kzalloc(len + STRING_PLUS, GFP_BRICK);
+ if (likely(res))
+ break;
+ msleep(1000);
+ }
+
+#ifdef BRICK_DEBUG_MEM
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+ memset(res + 1, '?', len - 1);
+#endif
+ atomic_inc(&phys_string_alloc);
+ if (unlikely(line < 0))
+ line = 0;
+ else if (unlikely(line >= BRICK_DEBUG_MEM))
+ line = BRICK_DEBUG_MEM - 1;
+ INT_ACCESS(res, 0) = MAGIC_STR;
+ INT_ACCESS(res, sizeof(int)) = len;
+ INT_ACCESS(res, sizeof(int) * 2) = line;
+ res += sizeof(int) * 3;
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+ strcpy(res + len, STRING_CANARY);
+#else
+ INT_ACCESS(res, len) = MAGIC_SEND;
+#endif
+ atomic_inc(&string_count[line]);
+#endif
+ return res;
+}
+
+void _brick_string_free(const char *data, int cline)
+{
+#ifdef BRICK_DEBUG_MEM
+ int magic;
+ int len;
+ int line;
+ char *orig = (void *)data;
+
+ data -= sizeof(int) * 3;
+ magic = INT_ACCESS(data, 0);
+ if (unlikely(magic != MAGIC_STR)) {
+ BRICK_ERR("cline %d stringmem corruption: magix %08x != %08x\n", cline, magic, MAGIC_STR);
+ goto _out_return;
+ }
+ len = INT_ACCESS(data, sizeof(int));
+ line = INT_ACCESS(data, sizeof(int) * 2);
+ if (unlikely(len <= 0)) {
+ BRICK_ERR("cline %d stringmem corruption: line = %d len = %d\n", cline, line, len);
+ goto _out_return;
+ }
+ if (unlikely(len > PAGE_SIZE))
+ BRICK_ERR("cline %d string too long: line = %d len = %d string='%s'\n", cline, line, len, orig);
+ if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
+ BRICK_ERR("cline %d stringmem corruption: line = %d (len = %d)\n", cline, line, len);
+ goto _out_return;
+ }
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+ if (unlikely(strcmp(orig + len, STRING_CANARY))) {
+ BRICK_ERR("cline %d stringmem corruption: bad canary '%s', line = %d len = %d\n",
+ cline, STRING_CANARY, line, len);
+ goto _out_return;
+ }
+ orig[len]--;
+ memset(orig, '!', len);
+#else
+ magic = INT_ACCESS(orig, len);
+ if (unlikely(magic != MAGIC_SEND)) {
+ BRICK_ERR("cline %d stringmem corruption: end_magix %08x != %08x, line = %d len = %d\n",
+ cline, magic, MAGIC_SEND, line, len);
+ goto _out_return;
+ }
+ INT_ACCESS(orig, len) = 0xffffffff;
+#endif
+ atomic_dec(&string_count[line]);
+ atomic_inc(&string_free[line]);
+ atomic_dec(&phys_string_alloc);
+#endif
+ kfree(data);
+#ifdef BRICK_DEBUG_MEM
+_out_return:;
+#endif
+}
+
+/***********************************************************************/
+
+/* block memory allocation */
+
+static
+int len2order(int len)
+{
+ int order = 0;
+
+ if (unlikely(len <= 0)) {
+ BRICK_ERR("trying to use %d bytes\n", len);
+ return 0;
+ }
+
+ while ((PAGE_SIZE << order) < len)
+ order++;
+
+ if (unlikely(order > BRICK_MAX_ORDER)) {
+ BRICK_ERR("trying to use %d bytes (oder = %d, max = %d)\n", len, order, BRICK_MAX_ORDER);
+ return BRICK_MAX_ORDER;
+ }
+ return order;
+}
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+static atomic_t _alloc_count[BRICK_MAX_ORDER+1];
+int brick_mem_alloc_count[BRICK_MAX_ORDER+1] = {};
+int brick_mem_alloc_max[BRICK_MAX_ORDER+1] = {};
+int brick_mem_freelist_max[BRICK_MAX_ORDER+1] = {};
+
+#endif
+
+#ifdef BRICK_DEBUG_MEM
+static atomic_t phys_block_alloc = ATOMIC_INIT(0);
+
+/* indexed by line */
+static atomic_t block_count[BRICK_DEBUG_MEM];
+static atomic_t block_free[BRICK_DEBUG_MEM];
+static int block_len[BRICK_DEBUG_MEM];
+
+/* indexed by order */
+static atomic_t op_count[BRICK_MAX_ORDER+1];
+static atomic_t raw_count[BRICK_MAX_ORDER+1];
+static int alloc_line[BRICK_MAX_ORDER+1];
+static int alloc_len[BRICK_MAX_ORDER+1];
+
+#endif
+
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+
+#define MAX_INFO_LISTS 1024
+
+#define INFO_LIST_HASH(addr) ((unsigned long)(addr) / (PAGE_SIZE * 2) % MAX_INFO_LISTS)
+
+struct mem_block_info {
+ struct list_head inf_head;
+ void *inf_data;
+ int inf_len;
+ int inf_line;
+ bool inf_used;
+};
+
+static struct list_head inf_anchor[MAX_INFO_LISTS];
+static rwlock_t inf_lock[MAX_INFO_LISTS];
+
+static
+void _new_block_info(void *data, int len, int cline)
+{
+ struct mem_block_info *inf;
+ int hash;
+
+ for (;;) {
+ inf = kmalloc(sizeof(struct mem_block_info), GFP_BRICK);
+ if (likely(inf))
+ break;
+ msleep(1000);
+ }
+ inf->inf_data = data;
+ inf->inf_len = len;
+ inf->inf_line = cline;
+ inf->inf_used = true;
+
+ hash = INFO_LIST_HASH(data);
+
+ write_lock(&inf_lock[hash]);
+ list_add(&inf->inf_head, &inf_anchor[hash]);
+ write_unlock(&inf_lock[hash]);
+}
+
+static
+struct mem_block_info *_find_block_info(void *data, bool remove)
+{
+ struct mem_block_info *res = NULL;
+ struct list_head *tmp;
+ int hash = INFO_LIST_HASH(data);
+
+ if (remove)
+ write_lock(&inf_lock[hash]);
+ else
+ read_lock(&inf_lock[hash]);
+ for (tmp = inf_anchor[hash].next; tmp != &inf_anchor[hash]; tmp = tmp->next) {
+ struct mem_block_info *inf = container_of(tmp, struct mem_block_info, inf_head);
+
+ if (inf->inf_data != data)
+ continue;
+ if (remove)
+ list_del_init(tmp);
+ res = inf;
+ break;
+ }
+ if (remove)
+ write_unlock(&inf_lock[hash]);
+ else
+ read_unlock(&inf_lock[hash]);
+ return res;
+}
+
+#endif /* CONFIG_MARS_DEBUG_MEM_STRONG */
+
+static inline
+void *__brick_block_alloc(gfp_t gfp, int order, int cline)
+{
+ void *res;
+
+ for (;;) {
+#ifdef USE_KERNEL_PAGES
+ res = (void *)__get_free_pages(gfp, order);
+#else
+ res = __vmalloc(PAGE_SIZE << order, gfp, PAGE_KERNEL_IO);
+#endif
+ if (likely(res))
+ break;
+ msleep(1000);
+ }
+
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+ _new_block_info(res, PAGE_SIZE << order, cline);
+#endif
+#ifdef BRICK_DEBUG_MEM
+ atomic_inc(&phys_block_alloc);
+ atomic_inc(&raw_count[order]);
+#endif
+ atomic64_add((PAGE_SIZE/1024) << order, &brick_global_block_used);
+
+ return res;
+}
+
+static inline
+void __brick_block_free(void *data, int order, int cline)
+{
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+ struct mem_block_info *inf = _find_block_info(data, true);
+
+ if (likely(inf)) {
+ int inf_len = inf->inf_len;
+ int inf_line = inf->inf_line;
+
+ kfree(inf);
+ if (unlikely(inf_len != (PAGE_SIZE << order))) {
+ BRICK_ERR("line %d: address %p: bad freeing size %d (correct should be %d, previous line = %d)\n",
+ cline,
+ data,
+ (int)(PAGE_SIZE << order),
+ inf_len,
+ inf_line);
+ goto err;
+ }
+ } else {
+ BRICK_ERR("line %d: trying to free non-existent address %p (order = %d)\n", cline, data, order);
+ goto err;
+ }
+#endif
+#ifdef USE_KERNEL_PAGES
+ __free_pages(virt_to_page((unsigned long)data), order);
+#else
+ vfree(data);
+#endif
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+err:
+#endif
+#ifdef BRICK_DEBUG_MEM
+ atomic_dec(&phys_block_alloc);
+ atomic_dec(&raw_count[order]);
+#endif
+ atomic64_sub((PAGE_SIZE/1024) << order, &brick_global_block_used);
+}
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+int brick_allow_freelist = 1;
+
+int brick_pre_reserve[BRICK_MAX_ORDER+1] = {};
+
+/* Note: we have no separate lists per CPU.
+ * This should not hurt because the freelists are only used
+ * for higher-order pages which should be rather low-frequency.
+ */
+static spinlock_t freelist_lock[BRICK_MAX_ORDER+1];
+static void *brick_freelist[BRICK_MAX_ORDER+1];
+static atomic_t freelist_count[BRICK_MAX_ORDER+1];
+
+static
+void *_get_free(int order, int cline)
+{
+ void *data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&freelist_lock[order], flags);
+ data = brick_freelist[order];
+ if (likely(data)) {
+ void *next = *(void **)data;
+
+#ifdef BRICK_DEBUG_MEM /* check for corruptions */
+ long pattern = *(((long *)data)+1);
+ void *copy = *(((void **)data)+2);
+
+ if (unlikely(pattern != 0xf0f0f0f0f0f0f0f0 || next != copy)) { /* found a corruption */
+ /* prevent further trouble by leaving a memleak */
+ brick_freelist[order] = NULL;
+ spin_unlock_irqrestore(&freelist_lock[order], flags);
+ BRICK_ERR("line %d:freelist corruption at %p (pattern = %lx next %p != %p, murdered = %d), order = %d\n",
+ cline, data, pattern, next, copy, atomic_read(&freelist_count[order]), order);
+ return NULL;
+ }
+#endif
+ brick_freelist[order] = next;
+ atomic_dec(&freelist_count[order]);
+ }
+ spin_unlock_irqrestore(&freelist_lock[order], flags);
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+ if (data) {
+ struct mem_block_info *inf = _find_block_info(data, false);
+
+ if (likely(inf)) {
+ if (unlikely(inf->inf_len != (PAGE_SIZE << order))) {
+ BRICK_ERR("line %d: address %p: bad freelist size %d (correct should be %d, previous line = %d)\n",
+ cline, data, (int)(PAGE_SIZE << order), inf->inf_len, inf->inf_line);
+ }
+ inf->inf_line = cline;
+ inf->inf_used = true;
+ } else {
+ BRICK_ERR("line %d: freelist address %p is invalid (order = %d)\n", cline, data, order);
+ }
+ }
+#endif
+ return data;
+}
+
+static
+void _put_free(void *data, int order)
+{
+ void *next;
+ unsigned long flags;
+
+#ifdef BRICK_DEBUG_MEM /* fill with pattern */
+ memset(data, 0xf0, PAGE_SIZE << order);
+#endif
+
+ spin_lock_irqsave(&freelist_lock[order], flags);
+ next = brick_freelist[order];
+ *(void **)data = next;
+#ifdef BRICK_DEBUG_MEM /* insert redundant copy for checking */
+ *(((void **)data)+2) = next;
+#endif
+ brick_freelist[order] = data;
+ spin_unlock_irqrestore(&freelist_lock[order], flags);
+ atomic_inc(&freelist_count[order]);
+}
+
+static
+void _free_all(void)
+{
+ int order;
+
+ for (order = BRICK_MAX_ORDER; order >= 0; order--) {
+ for (;;) {
+ void *data = _get_free(order, __LINE__);
+
+ if (!data)
+ break;
+ __brick_block_free(data, order, __LINE__);
+ }
+ }
+}
+
+int brick_mem_reserve(void)
+{
+ int order;
+ int status = 0;
+
+ for (order = BRICK_MAX_ORDER; order >= 0; order--) {
+ int max = brick_pre_reserve[order];
+ int i;
+
+ brick_mem_freelist_max[order] += max;
+ BRICK_INF("preallocating %d at order %d (new maxlevel = %d)\n",
+ max,
+ order,
+ brick_mem_freelist_max[order]);
+
+ max = brick_mem_freelist_max[order] - atomic_read(&freelist_count[order]);
+ if (max >= 0) {
+ for (i = 0; i < max; i++) {
+ void *data = __brick_block_alloc(GFP_KERNEL, order, __LINE__);
+
+ if (likely(data))
+ _put_free(data, order);
+ else
+ status = -ENOMEM;
+ }
+ } else {
+ for (i = 0; i < -max; i++) {
+ void *data = _get_free(order, __LINE__);
+
+ if (likely(data))
+ __brick_block_free(data, order, __LINE__);
+ }
+ }
+ }
+ return status;
+}
+#else
+int brick_mem_reserve(struct mem_reservation *r)
+{
+ BRICK_INF("preallocation is not compiled in\n");
+ return 0;
+}
+#endif
+
+void *_brick_block_alloc(loff_t pos, int len, int line)
+{
+ void *data;
+ int count;
+
+#ifdef BRICK_DEBUG_MEM
+#ifdef BRICK_DEBUG_ORDER0
+ const int plus0 = PAGE_SIZE;
+
+#else
+ const int plus0 = 0;
+
+#endif
+ const int plus = len <= PAGE_SIZE ? plus0 : PAGE_SIZE * 2;
+
+#else
+ const int plus = 0;
+
+#endif
+ int order = len2order(len + plus);
+
+ if (unlikely(order < 0)) {
+ BRICK_ERR("trying to allocate %d bytes (max = %d)\n", len, (int)(PAGE_SIZE << order));
+ return NULL;
+ }
+
+#ifdef CONFIG_MARS_DEBUG
+ might_sleep();
+#endif
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+ count = atomic_add_return(1, &_alloc_count[order]);
+ brick_mem_alloc_count[order] = count;
+ if (count > brick_mem_alloc_max[order])
+ brick_mem_alloc_max[order] = count;
+#endif
+
+#ifdef BRICK_DEBUG_MEM
+ atomic_inc(&op_count[order]);
+ /* statistics */
+ alloc_line[order] = line;
+ alloc_len[order] = len;
+#endif
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+ /* Dynamic increase of limits, in order to reduce
+ * fragmentation on higher-order pages.
+ * This comes on cost of higher memory usage.
+ */
+ if (order > 0 && count > brick_mem_freelist_max[order])
+ brick_mem_freelist_max[order] = count;
+#endif
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+ data = _get_free(order, line);
+ if (!data)
+#endif
+ data = __brick_block_alloc(GFP_BRICK, order, line);
+
+#ifdef BRICK_DEBUG_MEM
+ if (order > 0) {
+ if (unlikely(line < 0))
+ line = 0;
+ else if (unlikely(line >= BRICK_DEBUG_MEM))
+ line = BRICK_DEBUG_MEM - 1;
+ atomic_inc(&block_count[line]);
+ block_len[line] = len;
+ if (order > 1) {
+ INT_ACCESS(data, 0 * sizeof(int)) = MAGIC_BLOCK;
+ INT_ACCESS(data, 1 * sizeof(int)) = line;
+ INT_ACCESS(data, 2 * sizeof(int)) = len;
+ data += PAGE_SIZE;
+ INT_ACCESS(data, -1 * sizeof(int)) = MAGIC_BLOCK;
+ INT_ACCESS(data, len) = MAGIC_BEND;
+ } else if (order == 1) {
+ INT_ACCESS(data, PAGE_SIZE + 0 * sizeof(int)) = MAGIC_BLOCK;
+ INT_ACCESS(data, PAGE_SIZE + 1 * sizeof(int)) = line;
+ INT_ACCESS(data, PAGE_SIZE + 2 * sizeof(int)) = len;
+ }
+ }
+#endif
+ return data;
+}
+
+void _brick_block_free(void *data, int len, int cline)
+{
+ int order;
+
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+ struct mem_block_info *inf;
+ char *real_data;
+
+#endif
+#ifdef BRICK_DEBUG_MEM
+ int prev_line = 0;
+
+#ifdef BRICK_DEBUG_ORDER0
+ const int plus0 = PAGE_SIZE;
+
+#else
+ const int plus0 = 0;
+
+#endif
+ const int plus = len <= PAGE_SIZE ? plus0 : PAGE_SIZE * 2;
+
+#else
+ const int plus = 0;
+
+#endif
+
+ order = len2order(len + plus);
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+ real_data = data;
+ if (order > 1)
+ real_data -= PAGE_SIZE;
+ inf = _find_block_info(real_data, false);
+ if (likely(inf)) {
+ prev_line = inf->inf_line;
+ if (unlikely(inf->inf_len != (PAGE_SIZE << order))) {
+ BRICK_ERR("line %d: address %p: bad freeing size %d (correct should be %d, previous line = %d)\n",
+ cline, data, (int)(PAGE_SIZE << order), inf->inf_len, prev_line);
+ goto _out_return;
+ }
+ if (unlikely(!inf->inf_used)) {
+ BRICK_ERR("line %d: address %p: double freeing (previous line = %d)\n",
+ cline,
+ data,
+ prev_line);
+ goto _out_return;
+ }
+ inf->inf_line = cline;
+ inf->inf_used = false;
+ } else {
+ BRICK_ERR("line %d: trying to free non-existent address %p (order = %d)\n", cline, data, order);
+ goto _out_return;
+ }
+#endif
+#ifdef BRICK_DEBUG_MEM
+ if (order > 1) {
+ void *test = data - PAGE_SIZE;
+ int magic = INT_ACCESS(test, 0);
+ int line = INT_ACCESS(test, sizeof(int));
+ int oldlen = INT_ACCESS(test, sizeof(int)*2);
+ int magic1 = INT_ACCESS(data, -1 * sizeof(int));
+ int magic2;
+
+ if (unlikely(magic1 != MAGIC_BLOCK)) {
+ BRICK_ERR("line %d memory corruption: %p magix1 %08x != %08x (previous line = %d)\n",
+ cline,
+ data,
+ magic1,
+ MAGIC_BLOCK,
+ prev_line);
+ goto _out_return;
+ }
+ if (unlikely(magic != MAGIC_BLOCK)) {
+ BRICK_ERR("line %d memory corruption: %p magix %08x != %08x (previous line = %d)\n",
+ cline,
+ data,
+ magic,
+ MAGIC_BLOCK,
+ prev_line);
+ goto _out_return;
+ }
+ if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
+ BRICK_ERR("line %d memory corruption %p: alloc line = %d (previous line = %d)\n",
+ cline,
+ data,
+ line,
+ prev_line);
+ goto _out_return;
+ }
+ if (unlikely(oldlen != len)) {
+ BRICK_ERR("line %d memory corruption %p: len != oldlen (%d != %d, previous line = %d))\n",
+ cline,
+ data,
+ len,
+ oldlen,
+ prev_line);
+ goto _out_return;
+ }
+ magic2 = INT_ACCESS(data, len);
+ if (unlikely(magic2 != MAGIC_BEND)) {
+ BRICK_ERR("line %d memory corruption %p: magix %08x != %08x (previous line = %d)\n",
+ cline,
+ data,
+ magic,
+ MAGIC_BEND,
+ prev_line);
+ goto _out_return;
+ }
+ INT_ACCESS(test, 0) = 0xffffffff;
+ INT_ACCESS(data, len) = 0xffffffff;
+ data = test;
+ atomic_dec(&block_count[line]);
+ atomic_inc(&block_free[line]);
+ } else if (order == 1) {
+ void *test = data + PAGE_SIZE;
+ int magic = INT_ACCESS(test, 0 * sizeof(int));
+ int line = INT_ACCESS(test, 1 * sizeof(int));
+ int oldlen = INT_ACCESS(test, 2 * sizeof(int));
+
+ if (unlikely(magic != MAGIC_BLOCK)) {
+ BRICK_ERR("line %d memory corruption %p: magix %08x != %08x (previous line = %d)\n",
+ cline,
+ data,
+ magic,
+ MAGIC_BLOCK,
+ prev_line);
+ goto _out_return;
+ }
+ if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
+ BRICK_ERR("line %d memory corruption %p: alloc line = %d (previous line = %d)\n",
+ cline,
+ data,
+ line,
+ prev_line);
+ goto _out_return;
+ }
+ if (unlikely(oldlen != len)) {
+ BRICK_ERR("line %d memory corruption %p: len != oldlen (%d != %d, previous line = %d))\n",
+ cline,
+ data,
+ len,
+ oldlen,
+ prev_line);
+ goto _out_return;
+ }
+ atomic_dec(&block_count[line]);
+ atomic_inc(&block_free[line]);
+ }
+#endif
+#ifdef CONFIG_MARS_MEM_PREALLOC
+ if (order > 0 && brick_allow_freelist && atomic_read(&freelist_count[order]) <= brick_mem_freelist_max[order]) {
+ _put_free(data, order);
+ } else
+#endif
+ __brick_block_free(data, order, cline);
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+ brick_mem_alloc_count[order] = atomic_dec_return(&_alloc_count[order]);
+#endif
+#ifdef BRICK_DEBUG_MEM
+_out_return:;
+#endif
+}
+
+struct page *brick_iomap(void *data, int *offset, int *len)
+{
+ int _offset = ((unsigned long)data) & (PAGE_SIZE-1);
+ struct page *page;
+
+ *offset = _offset;
+ if (*len > PAGE_SIZE - _offset)
+ *len = PAGE_SIZE - _offset;
+ if (is_vmalloc_addr(data))
+ page = vmalloc_to_page(data);
+ else
+ page = virt_to_page(data);
+ return page;
+}
+
+/***********************************************************************/
+
+/* module */
+
+void brick_mem_statistics(bool final)
+{
+#ifdef BRICK_DEBUG_MEM
+ int i;
+ int count = 0;
+ int places = 0;
+
+ BRICK_INF("======== page allocation:\n");
+#ifdef CONFIG_MARS_MEM_PREALLOC
+ for (i = 0; i <= BRICK_MAX_ORDER; i++) {
+ BRICK_INF("pages order = %2d operations = %9d freelist_count = %4d / %3d raw_count = %5d alloc_count = %5d alloc_len = %5d line = %5d max_count = %5d\n",
+ i,
+ atomic_read(&op_count[i]),
+ atomic_read(&freelist_count[i]),
+ brick_mem_freelist_max[i],
+ atomic_read(&raw_count[i]),
+ brick_mem_alloc_count[i],
+ alloc_len[i],
+ alloc_line[i],
+ brick_mem_alloc_max[i]);
+ }
+#endif
+ for (i = 0; i < BRICK_DEBUG_MEM; i++) {
+ int val = atomic_read(&block_count[i]);
+
+ if (val) {
+ count += val;
+ places++;
+ BRICK_INF("line %4d: %6d allocated (last size = %4d, freed = %6d)\n",
+ i,
+ val,
+ block_len[i],
+ atomic_read(&block_free[i]));
+ }
+ }
+ if (!final || !count) {
+ BRICK_INF("======== %d block allocations in %d places (phys=%d)\n",
+ count, places, atomic_read(&phys_block_alloc));
+ } else {
+ BRICK_ERR("======== %d block allocations in %d places (phys=%d)\n",
+ count, places, atomic_read(&phys_block_alloc));
+ }
+ count = places = 0;
+ for (i = 0; i < BRICK_DEBUG_MEM; i++) {
+ int val = atomic_read(&mem_count[i]);
+
+ if (val) {
+ count += val;
+ places++;
+ BRICK_INF("line %4d: %6d allocated (last size = %4d, freed = %6d)\n",
+ i,
+ val,
+ mem_len[i],
+ atomic_read(&mem_free[i]));
+ }
+ }
+ if (!final || !count) {
+ BRICK_INF("======== %d memory allocations in %d places (phys=%d,redirect=%d)\n",
+ count, places,
+ atomic_read(&phys_mem_alloc), atomic_read(&mem_redirect_alloc));
+ } else {
+ BRICK_ERR("======== %d memory allocations in %d places (phys=%d,redirect=%d)\n",
+ count, places,
+ atomic_read(&phys_mem_alloc), atomic_read(&mem_redirect_alloc));
+ }
+ count = places = 0;
+ for (i = 0; i < BRICK_DEBUG_MEM; i++) {
+ int val = atomic_read(&string_count[i]);
+
+ if (val) {
+ count += val;
+ places++;
+ BRICK_INF("line %4d: %6d allocated (freed = %6d)\n",
+ i,
+ val,
+ atomic_read(&string_free[i]));
+ }
+ }
+ if (!final || !count) {
+ BRICK_INF("======== %d string allocations in %d places (phys=%d)\n",
+ count, places, atomic_read(&phys_string_alloc));
+ } else {
+ BRICK_ERR("======== %d string allocations in %d places (phys=%d)\n",
+ count, places, atomic_read(&phys_string_alloc));
+ }
+#endif
+}
+
+/* module init stuff */
+
+int __init init_brick_mem(void)
+{
+ int i;
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+ for (i = BRICK_MAX_ORDER; i >= 0; i--)
+ spin_lock_init(&freelist_lock[i]);
+#endif
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+ for (i = 0; i < MAX_INFO_LISTS; i++) {
+ INIT_LIST_HEAD(&inf_anchor[i]);
+ rwlock_init(&inf_lock[i]);
+ }
+#else
+ (void)i;
+#endif
+
+ get_total_ram();
+
+ return 0;
+}
+
+void exit_brick_mem(void)
+{
+ BRICK_INF("deallocating memory...\n");
+#ifdef CONFIG_MARS_MEM_PREALLOC
+ _free_all();
+#endif
+
+ brick_mem_statistics(true);
+}
diff --git a/include/linux/brick/brick_mem.h b/include/linux/brick/brick_mem.h
new file mode 100644
index 0000000..1a2f236
--- /dev/null
+++ b/include/linux/brick/brick_mem.h
@@ -0,0 +1,218 @@
+/*
+ * MARS Long Distance Replication Software
+ *
+ * Copyright (C) 2010-2014 Thomas Schoebel-Theuer
+ * Copyright (C) 2011-2014 1&1 Internet AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef BRICK_MEM_H
+#define BRICK_MEM_H
+
+#include <linux/mm_types.h>
+
+#define BRICK_DEBUG_MEM 4096
+
+#ifndef CONFIG_MARS_DEBUG_MEM
+#undef BRICK_DEBUG_MEM
+#endif
+#ifdef CONFIG_MARS_DEBUG_ORDER0
+#define BRICK_DEBUG_ORDER0
+#endif
+
+#define CONFIG_MARS_MEM_PREALLOC /* this is VITAL - disable only for experiments! */
+
+#define GFP_BRICK GFP_NOIO
+
+extern long long brick_global_memavail;
+extern long long brick_global_memlimit;
+extern atomic64_t brick_global_block_used;
+
+/* All brick memory allocations are guaranteed to succeed.
+ * In case of low memory, they will just retry (forever).
+ *
+ * We always prefer threads for concurrency.
+ * Therefore, in_interrupt() code does not occur, and we can
+ * always sleep in case of memory pressure.
+ *
+ * Resource deadlocks are avoided by the above memory limits.
+ * When exceeded, new memory is simply not allocated any more
+ * (except for vital memory, such as IO memory for which a
+ * low_mem_reserve must always exist, anyway).
+ */
+
+/***********************************************************************/
+
+/* compiler tweaking */
+
+/* Some functions are known to return non-null pointer values,
+ * at least under some Kconfig conditions.
+ *
+ * In code like...
+ *
+ * void *ptr = myfunction();
+ * if (unlikely(!ptr)) {
+ * printk("ERROR: this should not happen\n");
+ * goto fail;
+ * }
+ *
+ * ... the dead code elimination of gcc will not remove the if clause
+ * because the function might return a NULL value, even if a human
+ * would know that myfunction() does not return a NULL value.
+ *
+ * Unfortunately, the __attribute__((nonnull)) can only be applied
+ * to input parameters, but not to the return value.
+ *
+ * More unfortunately, a small inline wrapper does not help,
+ * because it seems that together with the elimination of the wrapper,
+ * its nonnull attribute seems to be eliminated alltogether.
+ * I don't know whether this is a bug or a feature (or just a weakness).
+ *
+ * Following is a small hack which solves the problem at least for gcc 4.7.
+ *
+ * In order to be useful, the -fdelete-null-pointer-checks must be set.
+ * Since BRICK is superuser-only anyway, enabling this for MARS should not
+ * be a security risk
+ * (c.f. upstream kernel commit a3ca86aea507904148870946d599e07a340b39bf)
+ */
+extern inline
+void *brick_mark_nonnull(void *_ptr)
+{
+ char *ptr = _ptr;
+
+ /* fool gcc to believe that the pointer were dereferenced... */
+ asm("" : : "X" (*ptr));
+ return ptr;
+}
+
+/***********************************************************************/
+
+/* small memory allocation (use this only for len < PAGE_SIZE) */
+
+#define brick_mem_alloc(_len_) \
+ ({ \
+ void *_res_ = _brick_mem_alloc(_len_, __LINE__); \
+ brick_mark_nonnull(_res_); \
+ })
+
+#define brick_zmem_alloc(_len_) \
+ ({ \
+ void *_res_ = _brick_mem_alloc(_len_, __LINE__); \
+ _res_ = brick_mark_nonnull(_res_); \
+ memset(_res_, 0, _len_); \
+ _res_; \
+ })
+
+#define brick_mem_free(_data_) \
+ do { \
+ if (_data_) { \
+ _brick_mem_free(_data_, __LINE__); \
+ } \
+ } while (0)
+
+/* don't use the following directly */
+extern void *_brick_mem_alloc(int len, int line) __attribute__((malloc)) __attribute__((alloc_size(1)));
+extern void _brick_mem_free(void *data, int line);
+
+/***********************************************************************/
+
+/* string memory allocation */
+
+#define BRICK_STRING_LEN 1024 /* default value when len == 0 */
+
+#define brick_string_alloc(_len_) \
+ ({ \
+ char *_res_ = _brick_string_alloc((_len_), __LINE__); \
+ (char *)brick_mark_nonnull(_res_); \
+ })
+
+#define brick_strndup(_orig_, _len_) \
+ ({ \
+ char *_res_ = _brick_string_alloc((_len_) + 1, __LINE__);\
+ _res_ = brick_mark_nonnull(_res_); \
+ strncpy(_res_, (_orig_), (_len_) + 1); \
+ /* always null-terminate for safety */ \
+ _res_[_len_] = '\0'; \
+ (char *)brick_mark_nonnull(_res_); \
+ })
+
+#define brick_strdup(_orig_) \
+ ({ \
+ int _len_ = strlen(_orig_); \
+ char *_res_ = _brick_string_alloc((_len_) + 1, __LINE__);\
+ _res_ = brick_mark_nonnull(_res_); \
+ strncpy(_res_, (_orig_), (_len_) + 1); \
+ (char *)brick_mark_nonnull(_res_); \
+ })
+
+#define brick_string_free(_data_) \
+ do { \
+ if (_data_) { \
+ _brick_string_free(_data_, __LINE__); \
+ } \
+ } while (0)
+
+/* don't use the following directly */
+extern char *_brick_string_alloc(int len, int line) __attribute__((malloc));
+extern void _brick_string_free(const char *data, int line);
+
+/***********************************************************************/
+
+/* block memory allocation (for aligned multiples of 512 resp PAGE_SIZE) */
+
+#define brick_block_alloc(_pos_, _len_) \
+ ({ \
+ void *_res_ = _brick_block_alloc((_pos_), (_len_), __LINE__);\
+ brick_mark_nonnull(_res_); \
+ })
+
+#define brick_block_free(_data_, _len_) \
+ do { \
+ if (_data_) { \
+ _brick_block_free((_data_), (_len_), __LINE__); \
+ } \
+ } while (0)
+
+extern struct page *brick_iomap(void *data, int *offset, int *len);
+
+/* don't use the following directly */
+extern void *_brick_block_alloc(loff_t pos, int len, int line) __attribute__((malloc)) __attribute__((alloc_size(2)));
+extern void _brick_block_free(void *data, int len, int cline);
+
+/***********************************************************************/
+
+/* reservations / preallocation */
+
+#define BRICK_MAX_ORDER 11
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+extern int brick_allow_freelist;
+
+extern int brick_pre_reserve[BRICK_MAX_ORDER+1];
+extern int brick_mem_freelist_max[BRICK_MAX_ORDER+1];
+extern int brick_mem_alloc_count[BRICK_MAX_ORDER+1];
+extern int brick_mem_alloc_max[BRICK_MAX_ORDER+1];
+
+extern int brick_mem_reserve(void);
+
+#endif
+
+extern void brick_mem_statistics(bool final);
+
+/***********************************************************************/
+
+/* init */
+
+extern int init_brick_mem(void);
+extern void exit_brick_mem(void);
+
+#endif
--
2.6.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/