[PATCH 02/22 -v7] Add basic support for gcc profiler instrumentation

From: Steven Rostedt
Date: Tue Jan 29 2008 - 22:19:51 EST


If CONFIG_MCOUNT is selected and /proc/sys/kernel/mcount_enabled is set to a
non-zero value the mcount routine will be called everytime we enter a kernel
function that is not marked with the "notrace" attribute.

The mcount routine will then call a registered function if a function
happens to be registered.

[This code has been highly hacked by Steven Rostedt, so don't
blame Arnaldo for all of this ;-) ]

Update:
It is now possible to register more than one mcount function.
If only one mcount function is registered, that will be the
function that mcount calls directly. If more than one function
is registered, then mcount will call a function that will loop
through the functions to call.

Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Steven Rostedt <srostedt@xxxxxxxxxx>
---
Makefile | 3
arch/x86/Kconfig | 1
arch/x86/kernel/entry_32.S | 25 +++++++
arch/x86/kernel/entry_64.S | 36 +++++++++++
include/linux/linkage.h | 2
include/linux/mcount.h | 38 ++++++++++++
kernel/sysctl.c | 11 +++
lib/Kconfig.debug | 1
lib/Makefile | 2
lib/tracing/Kconfig | 10 +++
lib/tracing/Makefile | 3
lib/tracing/mcount.c | 141 +++++++++++++++++++++++++++++++++++++++++++++
12 files changed, 273 insertions(+)

Index: linux-mcount.git/Makefile
===================================================================
--- linux-mcount.git.orig/Makefile 2008-01-29 17:01:56.000000000 -0500
+++ linux-mcount.git/Makefile 2008-01-29 17:26:17.000000000 -0500
@@ -509,6 +509,9 @@ endif

include $(srctree)/arch/$(SRCARCH)/Makefile

+ifdef CONFIG_MCOUNT
+KBUILD_CFLAGS += -pg
+endif
ifdef CONFIG_FRAME_POINTER
KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
else
Index: linux-mcount.git/arch/x86/Kconfig
===================================================================
--- linux-mcount.git.orig/arch/x86/Kconfig 2008-01-29 16:59:15.000000000 -0500
+++ linux-mcount.git/arch/x86/Kconfig 2008-01-29 17:26:18.000000000 -0500
@@ -19,6 +19,7 @@ config X86_64
config X86
bool
default y
+ select HAVE_MCOUNT

config GENERIC_TIME
bool
Index: linux-mcount.git/arch/x86/kernel/entry_32.S
===================================================================
--- linux-mcount.git.orig/arch/x86/kernel/entry_32.S 2008-01-29 16:59:15.000000000 -0500
+++ linux-mcount.git/arch/x86/kernel/entry_32.S 2008-01-29 17:26:18.000000000 -0500
@@ -75,6 +75,31 @@ DF_MASK = 0x00000400
NT_MASK = 0x00004000
VM_MASK = 0x00020000

+#ifdef CONFIG_MCOUNT
+.globl mcount
+mcount:
+ /* unlikely(mcount_enabled) */
+ cmpl $0, mcount_enabled
+ jnz trace
+ ret
+
+trace:
+ /* taken from glibc */
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %edx
+ movl 0x4(%ebp), %eax
+
+ call *mcount_trace_function
+
+ popl %edx
+ popl %ecx
+ popl %eax
+
+ ret
+#endif
+
#ifdef CONFIG_PREEMPT
#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
Index: linux-mcount.git/arch/x86/kernel/entry_64.S
===================================================================
--- linux-mcount.git.orig/arch/x86/kernel/entry_64.S 2008-01-29 16:59:15.000000000 -0500
+++ linux-mcount.git/arch/x86/kernel/entry_64.S 2008-01-29 17:26:18.000000000 -0500
@@ -53,6 +53,42 @@

.code64

+#ifdef CONFIG_MCOUNT
+
+ENTRY(mcount)
+ /* unlikely(mcount_enabled) */
+ cmpl $0, mcount_enabled
+ jnz trace
+ retq
+
+trace:
+ /* taken from glibc */
+ subq $0x38, %rsp
+ movq %rax, (%rsp)
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r8, 40(%rsp)
+ movq %r9, 48(%rsp)
+
+ movq 0x38(%rsp), %rsi
+ movq 8(%rbp), %rdi
+
+ call *mcount_trace_function
+
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r8
+ movq 32(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rdx
+ movq 8(%rsp), %rcx
+ movq (%rsp), %rax
+ addq $0x38, %rsp
+
+ retq
+#endif
+
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
Index: linux-mcount.git/include/linux/linkage.h
===================================================================
--- linux-mcount.git.orig/include/linux/linkage.h 2008-01-29 16:59:15.000000000 -0500
+++ linux-mcount.git/include/linux/linkage.h 2008-01-29 17:26:18.000000000 -0500
@@ -3,6 +3,8 @@

#include <asm/linkage.h>

+#define notrace __attribute__((no_instrument_function))
+
#ifdef __cplusplus
#define CPP_ASMLINKAGE extern "C"
#else
Index: linux-mcount.git/include/linux/mcount.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-mcount.git/include/linux/mcount.h 2008-01-29 17:26:18.000000000 -0500
@@ -0,0 +1,38 @@
+#ifndef _LINUX_MCOUNT_H
+#define _LINUX_MCOUNT_H
+
+#ifdef CONFIG_MCOUNT
+extern int mcount_enabled;
+
+#include <linux/linkage.h>
+
+#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
+#define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1))
+#define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2))
+
+typedef void (*mcount_func_t)(unsigned long ip, unsigned long parent_ip);
+
+struct mcount_ops {
+ mcount_func_t func;
+ struct mcount_ops *next;
+};
+
+/*
+ * The mcount_ops must be a static and should also
+ * be read_mostly. These functions do modify read_mostly variables
+ * so use them sparely. Never free an mcount_op or modify the
+ * next pointer after it has been registered. Even after unregistering
+ * it, the next pointer may still be used internally.
+ */
+int register_mcount_function(struct mcount_ops *ops);
+int unregister_mcount_function(struct mcount_ops *ops);
+void clear_mcount_function(void);
+
+extern void mcount(void);
+
+#else /* !CONFIG_MCOUNT */
+# define register_mcount_function(ops) do { } while (0)
+# define unregister_mcount_function(ops) do { } while (0)
+# define clear_mcount_function(ops) do { } while (0)
+#endif /* CONFIG_MCOUNT */
+#endif /* _LINUX_MCOUNT_H */
Index: linux-mcount.git/kernel/sysctl.c
===================================================================
--- linux-mcount.git.orig/kernel/sysctl.c 2008-01-29 17:02:10.000000000 -0500
+++ linux-mcount.git/kernel/sysctl.c 2008-01-29 17:26:18.000000000 -0500
@@ -46,6 +46,7 @@
#include <linux/nfs_fs.h>
#include <linux/acpi.h>
#include <linux/reboot.h>
+#include <linux/mcount.h>

#include <asm/uaccess.h>
#include <asm/processor.h>
@@ -514,6 +515,16 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+#ifdef CONFIG_MCOUNT
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "mcount_enabled",
+ .data = &mcount_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
#ifdef CONFIG_KMOD
{
.ctl_name = KERN_MODPROBE,
Index: linux-mcount.git/lib/Kconfig.debug
===================================================================
--- linux-mcount.git.orig/lib/Kconfig.debug 2008-01-29 17:02:10.000000000 -0500
+++ linux-mcount.git/lib/Kconfig.debug 2008-01-29 17:26:18.000000000 -0500
@@ -562,5 +562,6 @@ config LATENCYTOP
Enable this option if you want to use the LatencyTOP tool
to find out which userspace is blocking on what kernel operations.

+source lib/tracing/Kconfig

source "samples/Kconfig"
Index: linux-mcount.git/lib/Makefile
===================================================================
--- linux-mcount.git.orig/lib/Makefile 2008-01-29 17:02:10.000000000 -0500
+++ linux-mcount.git/lib/Makefile 2008-01-29 17:26:18.000000000 -0500
@@ -67,6 +67,8 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o

+obj-$(CONFIG_MCOUNT) += tracing/
+
lib-$(CONFIG_GENERIC_BUG) += bug.o

hostprogs-y := gen_crc32table
Index: linux-mcount.git/lib/tracing/Kconfig
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-mcount.git/lib/tracing/Kconfig 2008-01-29 17:26:18.000000000 -0500
@@ -0,0 +1,10 @@
+
+# Archs that enable MCOUNT should select HAVE_MCOUNT
+config HAVE_MCOUNT
+ bool
+
+# MCOUNT itself is useless, or will just be added overhead.
+# It needs something to register a function with it.
+config MCOUNT
+ bool
+ select FRAME_POINTER
Index: linux-mcount.git/lib/tracing/Makefile
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-mcount.git/lib/tracing/Makefile 2008-01-29 17:26:18.000000000 -0500
@@ -0,0 +1,3 @@
+obj-$(CONFIG_MCOUNT) += libmcount.o
+
+libmcount-y := mcount.o
Index: linux-mcount.git/lib/tracing/mcount.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-mcount.git/lib/tracing/mcount.c 2008-01-29 17:26:18.000000000 -0500
@@ -0,0 +1,141 @@
+/*
+ * Infrastructure for profiling code inserted by 'gcc -pg'.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@xxxxxxxxxx>
+ *
+ * Originally ported from the -rt patch by:
+ * Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
+ *
+ * Based on code in the latency_tracer, that is:
+ *
+ * Copyright (C) 2004-2006 Ingo Molnar
+ * Copyright (C) 2004 William Lee Irwin III
+ */
+
+#include <linux/module.h>
+#include <linux/mcount.h>
+
+/*
+ * Since we have nothing protecting between the test of
+ * mcount_trace_function and the call to it, we can't
+ * set it to NULL without risking a race that will have
+ * the kernel call the NULL pointer. Instead, we just
+ * set the function pointer to a dummy function.
+ */
+notrace void dummy_mcount_tracer(unsigned long ip,
+ unsigned long parent_ip)
+{
+ /* do nothing */
+}
+
+static DEFINE_SPINLOCK(mcount_func_lock);
+static struct mcount_ops mcount_list_end __read_mostly =
+{
+ .func = dummy_mcount_tracer,
+};
+
+static struct mcount_ops *mcount_list __read_mostly = &mcount_list_end;
+mcount_func_t mcount_trace_function __read_mostly = dummy_mcount_tracer;
+int mcount_enabled __read_mostly;
+
+/* mcount is defined per arch in assembly */
+EXPORT_SYMBOL_GPL(mcount);
+
+notrace void mcount_list_func(unsigned long ip, unsigned long parent_ip)
+{
+ struct mcount_ops *op = mcount_list;
+
+ while (op != &mcount_list_end) {
+ op->func(ip, parent_ip);
+ op = op->next;
+ };
+}
+
+/**
+ * register_mcount_function - register a function for profiling
+ * @ops - ops structure that holds the function for profiling.
+ *
+ * Register a function to be called by all functions in the
+ * kernel.
+ *
+ * Note: @ops->func and all the functions it calls must be labeled
+ * with "notrace", otherwise it will go into a
+ * recursive loop.
+ */
+int register_mcount_function(struct mcount_ops *ops)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&mcount_func_lock, flags);
+ ops->next = mcount_list;
+ /* must have next seen before we update the list pointer */
+ smp_wmb();
+ mcount_list = ops;
+ /*
+ * For one func, simply call it directly.
+ * For more than one func, call the chain.
+ */
+ if (ops->next == &mcount_list_end)
+ mcount_trace_function = ops->func;
+ else
+ mcount_trace_function = mcount_list_func;
+ spin_unlock_irqrestore(&mcount_func_lock, flags);
+
+ return 0;
+}
+
+/**
+ * unregister_mcount_function - unresgister a function for profiling.
+ * @ops - ops structure that holds the function to unregister
+ *
+ * Unregister a function that was added to be called by mcount profiling.
+ */
+int unregister_mcount_function(struct mcount_ops *ops)
+{
+ unsigned long flags;
+ struct mcount_ops **p;
+ int ret = 0;
+
+ spin_lock_irqsave(&mcount_func_lock, flags);
+
+ /*
+ * If we are the only function, then the mcount pointer is
+ * pointing directly to that function.
+ */
+ if (mcount_list == ops && ops->next == &mcount_list_end) {
+ mcount_trace_function = dummy_mcount_tracer;
+ mcount_list = &mcount_list_end;
+ goto out;
+ }
+
+ for (p = &mcount_list; *p != &mcount_list_end; p = &(*p)->next)
+ if (*p == ops)
+ break;
+
+ if (*p != ops) {
+ ret = -1;
+ goto out;
+ }
+
+ *p = (*p)->next;
+
+ /* If we only have one func left, then call that directly */
+ if (mcount_list->next == &mcount_list_end)
+ mcount_trace_function = mcount_list->func;
+
+ out:
+ spin_unlock_irqrestore(&mcount_func_lock, flags);
+
+ return 0;
+}
+
+/**
+ * clear_mcount_function - reset the mcount function
+ *
+ * This NULLs the mcount function and in essence stops
+ * tracing. There may be lag
+ */
+void clear_mcount_function(void)
+{
+ mcount_trace_function = dummy_mcount_tracer;
+}

--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/