[patch 2/4] This patch enhances OProfile to support System zs hardware sampling feature

From: graalfs
Date: Mon Dec 20 2010 - 08:06:41 EST


From: graalfs@xxxxxxxxxxxxxxxxxx

OProfile is enhanced to export all files for controlling System z's hardware sampling,
and to invoke hwsampler exported functions to initialize and use System z's hardware sampling.

The patch invokes hwsampler_setup() during oprofile init and exports following
hwsampler files under oprofilefs if hwsampler's setup succeeded:

A new directory for hardware sampling based files

/dev/oprofile/hwsampling/

The userland daemon must explicitly write to the following files
to disable (or enable) hardware based sampling

/dev/oprofile/hwsampling/hwsampler

to modify the actual sampling rate

/dev/oprofile/hwsampling/hw_interval

to modify the amount of sampling memory (measured in 4K pages)

/dev/oprofile/hwsampling/hw_sdbt_blocks

The following files are read only and show
the possible minimum sampling rate

/dev/oprofile/hwsampling/hw_min_interval

the possible maximum sampling rate

/dev/oprofile/hwsampling/hw_max_interval

The patch splits the oprofile_timer_[init/exit] function so that it can be also called
through user context (oprofilefs) to avoid kernel oops.

Signed-off-by: Mahesh Salgaonkar <mahesh@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Maran Pakkirisamy <maranp@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Heinz Graalfs <graalfs@xxxxxxxxxxxxxxxxxx>
---
arch/s390/oprofile/Makefile | 1
arch/s390/oprofile/hwsampler_files.c | 120 +++++++++++++++++++++++++++++++++++
arch/s390/oprofile/init.c | 35 ++++++++++
drivers/oprofile/oprof.c | 37 ++++++++++
drivers/oprofile/oprof.h | 2
drivers/oprofile/timer_int.c | 16 +++-
include/linux/oprofile.h | 15 ++++
7 files changed, 223 insertions(+), 3 deletions(-)

Index: linux-2.6/arch/s390/oprofile/Makefile
===================================================================
--- linux-2.6.orig/arch/s390/oprofile/Makefile
+++ linux-2.6/arch/s390/oprofile/Makefile
@@ -8,6 +8,7 @@ DRIVER_OBJS = $(addprefix ../../../drive
timer_int.o )

oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-y += $(if $(CONFIG_HWSAMPLER), hwsampler_files.o,)

HW_SAMPLER_DRIVER_OBJS = $(addprefix ../../../drivers/s390/hwsampler/, \
hwsampler-main.o smpctl.o )
Index: linux-2.6/arch/s390/oprofile/hwsampler_files.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/s390/oprofile/hwsampler_files.c
@@ -0,0 +1,120 @@
+/**
+ * arch/s390/oprofile/hwsampler_files.c
+ *
+ * Copyright IBM Corp. 2010
+ * Author: Mahesh Salgaonkar (mahesh@xxxxxxxxxxxxxxxxxx)
+ */
+#include <linux/oprofile.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+
+#include <asm/hwsampler.h>
+
+#define DEFAULT_INTERVAL 4096
+
+#define DEFAULT_SDBT_BLOCKS 1
+#define DEFAULT_SDB_BLOCKS 511
+
+static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
+unsigned long oprofile_min_interval;
+unsigned long oprofile_max_interval;
+
+static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
+static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
+
+static unsigned long oprofile_hwsampler;
+
+static int oprofile_hwsampler_start(void)
+{
+ int retval;
+
+ printk(KERN_INFO "oprofile_hwsampler_start\n");
+
+ retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+ if (retval)
+ return retval;
+
+ retval = hwsampler_start_all(oprofile_hw_interval);
+
+ return retval;
+}
+
+static void oprofile_hwsampler_stop(void)
+{
+ printk(KERN_INFO "oprofile_hwsampler_stop\n");
+
+ hwsampler_stop_all();
+ hwsampler_deallocate();
+ return;
+}
+
+int oprofile_arch_set_hwsampler(struct oprofile_operations *ops)
+{
+ printk(KERN_INFO "oprofile: using hardware sampling\n");
+ ops->start = oprofile_hwsampler_start;
+ ops->stop = oprofile_hwsampler_stop;
+ ops->cpu_type = "timer";
+
+ return 0;
+}
+
+static ssize_t hwsampler_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(oprofile_hwsampler, buf, count, offset);
+}
+
+static ssize_t hwsampler_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval)
+ return retval;
+
+ if (oprofile_hwsampler == val)
+ return -EINVAL;
+
+ retval = oprofile_set_hwsampler(val);
+
+ if (retval)
+ return retval;
+
+ oprofile_hwsampler = val;
+ return count;
+}
+
+static const struct file_operations hwsampler_fops = {
+ .read = hwsampler_read,
+ .write = hwsampler_write,
+};
+
+int oprofile_create_hwsampling_files(struct super_block *sb,
+ struct dentry *root)
+{
+ struct dentry *hw_dir;
+
+ /* reinitialize default values */
+ oprofile_hwsampler = 1;
+
+ hw_dir = oprofilefs_mkdir(sb, root, "hwsampling");
+ if (!hw_dir)
+ return -EINVAL;
+
+ oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops);
+ oprofilefs_create_ulong(sb, hw_dir, "hw_interval",
+ &oprofile_hw_interval);
+ oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval",
+ &oprofile_min_interval);
+ oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval",
+ &oprofile_max_interval);
+ oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks",
+ &oprofile_sdbt_blocks);
+
+ return 0;
+}
Index: linux-2.6/drivers/oprofile/oprof.c
===================================================================
--- linux-2.6.orig/drivers/oprofile/oprof.c
+++ linux-2.6/drivers/oprofile/oprof.c
@@ -239,10 +239,43 @@ int oprofile_set_ulong(unsigned long *ad
return err;
}

+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+int oprofile_set_hwsampler(unsigned long val)
+{
+ int err = 0;
+
+ mutex_lock(&start_mutex);
+
+ if (oprofile_started) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ switch (val) {
+ case 1:
+ /* Switch to hardware sampling. */
+ __oprofile_timer_exit();
+ err = oprofile_arch_set_hwsampler(&oprofile_ops);
+ break;
+ case 0:
+ printk(KERN_INFO "oprofile: using timer interrupt.\n");
+ err = __oprofile_timer_init(&oprofile_ops);
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+out:
+ mutex_unlock(&start_mutex);
+ return err;
+}
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
+
static int __init oprofile_init(void)
{
int err;

+ memset(&oprofile_ops, 0, sizeof(oprofile_ops));
err = oprofile_arch_init(&oprofile_ops);
if (err < 0 || timer) {
printk(KERN_INFO "oprofile: using timer interrupt.\n");
@@ -250,6 +283,10 @@ static int __init oprofile_init(void)
if (err)
return err;
}
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ else if (err == 0)
+ oprofile_arch_set_hwsampler(&oprofile_ops);
+#endif
return oprofilefs_register();
}

Index: linux-2.6/drivers/oprofile/oprof.h
===================================================================
--- linux-2.6.orig/drivers/oprofile/oprof.h
+++ linux-2.6/drivers/oprofile/oprof.h
@@ -35,7 +35,9 @@ struct dentry;

void oprofile_create_files(struct super_block *sb, struct dentry *root);
int oprofile_timer_init(struct oprofile_operations *ops);
+int __oprofile_timer_init(struct oprofile_operations *ops);
void oprofile_timer_exit(void);
+void __oprofile_timer_exit(void);

int oprofile_set_ulong(unsigned long *addr, unsigned long val);
int oprofile_set_timeout(unsigned long time);
Index: linux-2.6/drivers/oprofile/timer_int.c
===================================================================
--- linux-2.6.orig/drivers/oprofile/timer_int.c
+++ linux-2.6/drivers/oprofile/timer_int.c
@@ -97,14 +97,13 @@ static struct notifier_block __refdata o
.notifier_call = oprofile_cpu_notify,
};

-int __init oprofile_timer_init(struct oprofile_operations *ops)
+int __oprofile_timer_init(struct oprofile_operations *ops)
{
int rc;

rc = register_hotcpu_notifier(&oprofile_cpu_notifier);
if (rc)
return rc;
- ops->create_files = NULL;
ops->setup = NULL;
ops->shutdown = NULL;
ops->start = oprofile_hrtimer_start;
@@ -113,7 +112,18 @@ int __init oprofile_timer_init(struct op
return 0;
}

-void __exit oprofile_timer_exit(void)
+int __init oprofile_timer_init(struct oprofile_operations *ops)
+{
+ return __oprofile_timer_init(ops);
+}
+
+void __oprofile_timer_exit(void)
{
unregister_hotcpu_notifier(&oprofile_cpu_notifier);
}
+
+void __exit oprofile_timer_exit(void)
+{
+ __oprofile_timer_exit();
+}
+
Index: linux-2.6/include/linux/oprofile.h
===================================================================
--- linux-2.6.orig/include/linux/oprofile.h
+++ linux-2.6/include/linux/oprofile.h
@@ -89,6 +89,21 @@ int oprofile_arch_init(struct oprofile_o
*/
void oprofile_arch_exit(void);

+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+/**
+ * setup hardware sampler for oprofiling.
+ */
+
+int oprofile_set_hwsampler(unsigned long);
+
+/**
+ * hardware sampler module initialization for the s390 arch
+ */
+
+int oprofile_arch_set_hwsampler(struct oprofile_operations *ops);
+
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
+
/**
* Add a sample. This may be called from any context.
*/
Index: linux-2.6/arch/s390/oprofile/init.c
===================================================================
--- linux-2.6.orig/arch/s390/oprofile/init.c
+++ linux-2.6/arch/s390/oprofile/init.c
@@ -11,16 +11,51 @@
#include <linux/oprofile.h>
#include <linux/init.h>
#include <linux/errno.h>
+#include <linux/fs.h>

+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+#include <asm/hwsampler.h>
+
+extern int oprofile_create_hwsampling_files(struct super_block *sb,
+ struct dentry *root);
+
+extern unsigned long oprofile_min_interval;
+extern unsigned long oprofile_max_interval;
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */

extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);

int __init oprofile_arch_init(struct oprofile_operations* ops)
{
ops->backtrace = s390_backtrace;
+
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ if (hwsampler_setup())
+ return -ENODEV;
+
+ /*
+ * create hwsampler files only if hwsampler_setup() succeeds.
+ */
+ ops->create_files = oprofile_create_hwsampling_files;
+ oprofile_min_interval = hwsampler_query_min_interval();
+ if (oprofile_min_interval < 0) {
+ oprofile_min_interval = 0;
+ return -ENODEV;
+ }
+ oprofile_max_interval = hwsampler_query_max_interval();
+ if (oprofile_max_interval < 0) {
+ oprofile_max_interval = 0;
+ return -ENODEV;
+ }
+ return 0;
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
+
return -ENODEV;
}

void oprofile_arch_exit(void)
{
+#ifdef CONFIG_OPROFILE_HWSAMPLING_MODE
+ hwsampler_shutdown();
+#endif /* CONFIG_OPROFILE_HWSAMPLING_MODE */
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/