[PATCH 09/15] tracing: Add kmalloc/kfree macros

From: Tom Zanussi
Date: Mon Mar 02 2015 - 11:01:51 EST


Make the kmalloc function in slab.h into a macro, and define a normal
and a _notrace version.

The _notrace version is for tracing code that wants to be able to use
kmalloc for its purposes but at the same time be able to trace kmalloc
and friends. Examples would be the bpf map and hist triggers code.

The reason for doing this is as a macro is so that we can avoid any
change at all to the normal kmalloc since its performance is obviously
critical. It allows us to define a _notrace version that reuses the
kmalloc code but additionally sets the ___GFP_NOTRACE flag. This
allows any downstream call to a tracepoint function to be avoided, as
the DEFINE_EVENT_CONDITION() TP_CONDITION will simply cause the trace
call to exit when it sees the ___GFP_NOTRACE_FLAG.

Because the #ifdef CONFIG_SLOB in the original causes problems if it's
inside a macro, it was explicitly broken out into two different macros
for those cases.

Though it doesn't suffer from the same recursion problems that
motivate the kmalloc macro, we also need to define _notrace versions
of kfree() as well, in order to allow for proper accounting. Users of
kmalloc_notrace() should use kfree_notrace() to make sure the kfrees
corresponding to the untraced kmallocs don't appear in the trace
stream.

Signed-off-by: Tom Zanussi <tom.zanussi@xxxxxxxxxxxxxxx>
---
include/linux/slab.h | 61 ++++++++++++++++++++++++++++++++++++++--------------
mm/slab.c | 45 ++++++++++++++++++++++----------------
mm/slob.c | 45 ++++++++++++++++++++++----------------
mm/slub.c | 47 +++++++++++++++++++++++-----------------
4 files changed, 124 insertions(+), 74 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9a139b6..7519aaa 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -142,6 +142,7 @@ void kmem_cache_free(struct kmem_cache *, void *);
void * __must_check __krealloc(const void *, size_t, gfp_t);
void * __must_check krealloc(const void *, size_t, gfp_t);
void kfree(const void *);
+void kfree_notrace(const void *);
void kzfree(const void *);
size_t ksize(const void *);

@@ -409,25 +410,53 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
* for general use, and so are not documented here. For a full list of
* potential flags, always refer to linux/gfp.h.
*/
-static __always_inline void *kmalloc(size_t size, gfp_t flags)
-{
- if (__builtin_constant_p(size)) {
- if (size > KMALLOC_MAX_CACHE_SIZE)
- return kmalloc_large(size, flags);
-#ifndef CONFIG_SLOB
- if (!(flags & GFP_DMA)) {
- int index = kmalloc_index(size);

- if (!index)
- return ZERO_SIZE_PTR;
+#define set_gfp_notrace_flag(flags)
+#define set_gfp_notrace_flag_notrace(flags) (flags |= ___GFP_NOTRACE)

- return kmem_cache_alloc_trace(kmalloc_caches[index],
- flags, size);
- }
-#endif
- }
- return __kmalloc(size, flags);
+#ifndef CONFIG_SLOB
+#define DEFINE_KMALLOC(_suffix) \
+static __always_inline void *kmalloc##_suffix(size_t size, gfp_t flags) \
+{ \
+ set_gfp_notrace_flag##_suffix(flags); \
+ \
+ if (__builtin_constant_p(size)) { \
+ if (size > KMALLOC_MAX_CACHE_SIZE) \
+ return kmalloc_large(size, flags); \
+ \
+ if (!(flags & GFP_DMA)) { \
+ int index = kmalloc_index(size); \
+ \
+ if (!index) \
+ return ZERO_SIZE_PTR; \
+ \
+ return kmem_cache_alloc_trace(kmalloc_caches[index],\
+ flags, size); \
+ } \
+ } \
+ return __kmalloc(size, flags); \
}
+#else
+#define DEFINE_KMALLOC(_suffix) \
+static __always_inline void *kmalloc##_suffix(size_t size, gfp_t flags) \
+{ \
+ set_gfp_notrace_flag##_suffix(flags); \
+ \
+ if (__builtin_constant_p(size)) { \
+ if (size > KMALLOC_MAX_CACHE_SIZE) \
+ return kmalloc_large(size, flags); \
+ } \
+ return __kmalloc(size, flags); \
+}
+#endif /* !CONFIG_SLOB */
+
+DEFINE_KMALLOC()
+#ifdef CONFIG_TRACING
+DEFINE_KMALLOC(_notrace)
+#else
+#define kmalloc_notrace kmalloc
+#define kfree_notrace kfree
+#endif

/*
* Determine size used for the nth kmalloc cache.
diff --git a/mm/slab.c b/mm/slab.c
index 65b5dcb..c51c96a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3553,25 +3553,32 @@ EXPORT_SYMBOL(kmem_cache_free);
* Don't free memory not originally allocated by kmalloc()
* or you will run into trouble.
*/
-void kfree(const void *objp)
-{
- struct kmem_cache *c;
- unsigned long flags;
-
- trace_kfree(_RET_IP_, objp);
-
- if (unlikely(ZERO_OR_NULL_PTR(objp)))
- return;
- local_irq_save(flags);
- kfree_debugcheck(objp);
- c = virt_to_cache(objp);
- debug_check_no_locks_freed(objp, c->object_size);
-
- debug_check_no_obj_freed(objp, c->object_size);
- __cache_free(c, (void *)objp, _RET_IP_);
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL(kfree);
+#define trace_kfree_notrace
+#define DEFINE_KFREE(_suffix) \
+void kfree##_suffix(const void *objp) \
+{ \
+ struct kmem_cache *c; \
+ unsigned long flags; \
+ \
+ trace_kfree##_suffix(_RET_IP_, objp); \
+ \
+ if (unlikely(ZERO_OR_NULL_PTR(objp))) \
+ return; \
+ local_irq_save(flags); \
+ kfree_debugcheck(objp); \
+ c = virt_to_cache(objp); \
+ debug_check_no_locks_freed(objp, c->object_size); \
+ \
+ debug_check_no_obj_freed(objp, c->object_size); \
+ __cache_free(c, (void *)objp, _RET_IP_); \
+ local_irq_restore(flags); \
+} \
+EXPORT_SYMBOL(kfree##_suffix);
+
+DEFINE_KFREE()
+#ifdef CONFIG_TRACING
+DEFINE_KFREE(_notrace)
+#endif

/*
* This initializes kmem_cache_node or resizes various caches for all nodes.
diff --git a/mm/slob.c b/mm/slob.c
index 96a8620..b3d37c4 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -481,25 +481,32 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfp,
}
#endif

-void kfree(const void *block)
-{
- struct page *sp;
-
- trace_kfree(_RET_IP_, block);
-
- if (unlikely(ZERO_OR_NULL_PTR(block)))
- return;
- kmemleak_free(block);
-
- sp = virt_to_page(block);
- if (PageSlab(sp)) {
- int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
- unsigned int *m = (unsigned int *)(block - align);
- slob_free(m, *m + align);
- } else
- __free_pages(sp, compound_order(sp));
-}
-EXPORT_SYMBOL(kfree);
+#define trace_kfree_notrace
+#define DEFINE_KFREE(_suffix) \
+void kfree##_suffix(const void *block) \
+{ \
+ struct page *sp; \
+ \
+ trace_kfree##_suffix(_RET_IP_, block); \
+ \
+ if (unlikely(ZERO_OR_NULL_PTR(block))) \
+ return; \
+ kmemleak_free(block); \
+ \
+ sp = virt_to_page(block); \
+ if (PageSlab(sp)) { \
+ int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); \
+ unsigned int *m = (unsigned int *)(block - align); \
+ slob_free(m, *m + align); \
+ } else \
+ __free_pages(sp, compound_order(sp)); \
+} \
+EXPORT_SYMBOL(kfree##_suffix);
+
+DEFINE_KFREE()
+#ifdef CONFIG_TRACING
+DEFINE_KFREE(_notrace)
+#endif

/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
size_t ksize(const void *block)
diff --git a/mm/slub.c b/mm/slub.c
index fe376fe..93d4442 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3326,26 +3326,33 @@ size_t ksize(const void *object)
}
EXPORT_SYMBOL(ksize);

-void kfree(const void *x)
-{
- struct page *page;
- void *object = (void *)x;
-
- trace_kfree(_RET_IP_, x);
-
- if (unlikely(ZERO_OR_NULL_PTR(x)))
- return;
-
- page = virt_to_head_page(x);
- if (unlikely(!PageSlab(page))) {
- BUG_ON(!PageCompound(page));
- kfree_hook(x);
- __free_kmem_pages(page, compound_order(page));
- return;
- }
- slab_free(page->slab_cache, page, object, _RET_IP_);
-}
-EXPORT_SYMBOL(kfree);
+#define trace_kfree_notrace
+#define DEFINE_KFREE(_suffix) \
+void kfree##_suffix(const void *x) \
+{ \
+ struct page *page; \
+ void *object = (void *)x; \
+ \
+ trace_kfree##_suffix(_RET_IP_, x); \
+ \
+ if (unlikely(ZERO_OR_NULL_PTR(x))) \
+ return; \
+ \
+ page = virt_to_head_page(x); \
+ if (unlikely(!PageSlab(page))) { \
+ BUG_ON(!PageCompound(page)); \
+ kfree_hook(x); \
+ __free_kmem_pages(page, compound_order(page)); \
+ return; \
+ } \
+ slab_free(page->slab_cache, page, object, _RET_IP_); \
+} \
+EXPORT_SYMBOL(kfree##_suffix);
+
+DEFINE_KFREE()
+#ifdef CONFIG_TRACING
+DEFINE_KFREE(_notrace)
+#endif

/*
* kmem_cache_shrink removes empty slabs from the partial lists and sorts
--
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/