[PATCH] perf: Introduce extended syscall error reporting

From: Alexander Shishkin
Date: Fri Jul 03 2015 - 10:12:54 EST


It has been pointed several times out that perf syscall error reporting
leaves a lot to be desired [1].

This patch introduces a fairly simple extension that allows call sites
to annotate their error codes with arbitrary strings, which will then
be copied to userspace (if they asked for it) along with the module
name that produced the error message in JSON format. This way, we can
provide both human-readable and machine-parsable information to user and
leave room for extensions in the future.

[1] http://marc.info/?l=linux-kernel&m=141470811013082

Signed-off-by: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
---
include/linux/perf_event.h | 38 ++++++++++++++++++++++++
include/uapi/linux/perf_event.h | 8 ++++-
kernel/events/core.c | 66 +++++++++++++++++++++++++++++++++++++++--
3 files changed, 108 insertions(+), 4 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1b82d44b0a..15bbef478f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -56,6 +56,37 @@ struct perf_guest_info_callbacks {
#include <linux/cgroup.h>
#include <asm/local.h>

+#ifndef PERF_MODNAME
+#define PERF_MODNAME KBUILD_MODNAME
+#endif
+
+/*
+ * Extended error reporting: annotate an error code with a string
+ * and a module name to help users diagnase problems with their
+ * attributes and whatnot.
+ */
+struct perf_err_site {
+ const char *message;
+ const char *owner;
+ const int code;
+};
+
+#ifdef CONFIG_PERF_EVENTS
+
+#define __perf_err(__e, __c, __m) ({ \
+ static struct perf_err_site __err_site = { \
+ .message = (__m), \
+ .owner = PERF_MODNAME, \
+ .code = (__c), \
+ }; \
+ (__e) = &__err_site; \
+ (__c); \
+})
+
+#define perf_err(__evt, __c, __m) ({ __perf_err((__evt)->error, (__c), (__m)); })
+
+#endif
+
struct perf_callchain_entry {
__u64 nr;
__u64 ip[PERF_MAX_STACK_DEPTH];
@@ -447,6 +478,13 @@ struct perf_event {
struct list_head owner_entry;
struct task_struct *owner;

+ /*
+ * Extended error reporting
+ */
+ const struct perf_err_site *error;
+ void __user *error_buffer;
+ size_t error_buffer_size;
+
/* mmap bits */
struct mutex mmap_mutex;
atomic_t mmap_count;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index d97f84c080..d1ae1a079c 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -264,6 +264,7 @@ enum perf_event_read_format {
/* add: sample_stack_user */
#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
+#define PERF_ATTR_SIZE_VER6 120 /* add: perf_err */

/*
* Hardware event_id to monitor via a performance monitoring event:
@@ -374,7 +375,12 @@ struct perf_event_attr {
* Wakeup watermark for AUX area
*/
__u32 aux_watermark;
- __u32 __reserved_2; /* align to __u64 */
+
+ /*
+ * Extended error reporting buffer
+ */
+ __u32 perf_err_size;
+ __u64 perf_err;
};

#define perf_flags(attr) (*(&(attr)->read_format + 1))
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8e13f3e54e..fd345c96d1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -49,6 +49,60 @@

#include <asm/irq_regs.h>

+static bool extended_reporting_enabled(struct perf_event_attr *attr)
+{
+ if (attr->size >= PERF_ATTR_SIZE_VER6 &&
+ attr->perf_err_size > 0)
+ return true;
+
+ return false;
+}
+
+/*
+ * Provide a JSON formatted error report to the user if they asked for it.
+ */
+static void __perf_error_report(struct perf_event_attr *attr,
+ const struct perf_err_site *err_site)
+{
+ void *buffer;
+
+ if (!err_site || !extended_reporting_enabled(attr))
+ return;
+
+ buffer = kasprintf(GFP_KERNEL,
+ "{\n"
+ "\t\"code\": %d,\n"
+ "\t\"module\": \"%s\",\n"
+ "\t\"message\": \"%s\"\n"
+ "}\n",
+ err_site->code, err_site->owner, err_site->message);
+ if (!buffer)
+ return;
+
+ (void)copy_to_user((void __user *)attr->perf_err, buffer,
+ attr->perf_err_size);
+ kfree(buffer);
+}
+
+/*
+ * Synchronous version of perf_err(), for the paths where we don't have
+ * an event.
+ */
+#define perf_err_attr(__attr, __c, __m) ({ \
+ struct perf_err_site *__site; \
+ __perf_err(__site, (__c), (__m)); \
+ __perf_error_report(__attr, __site); \
+ (__c); \
+})
+
+/*
+ * Report an error before returning from a syscall
+ */
+static void perf_error_report(struct perf_event *event)
+{
+ __perf_error_report(&event->attr, event->error);
+}
+
static struct workqueue_struct *perf_wq;

typedef int (*remote_function_f)(void *);
@@ -3594,6 +3648,8 @@ static void _free_event(struct perf_event *event)
*/
static void free_event(struct perf_event *event)
{
+ perf_error_report(event);
+
if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
"unexpected event refcount: %ld; ptr=%p\n",
atomic_long_read(&event->refcount), event)) {
@@ -3661,6 +3717,8 @@ static void put_event(struct perf_event *event)
if (!atomic_long_dec_and_test(&event->refcount))
return;

+ perf_error_report(event);
+
if (!is_kernel_event(event))
perf_remove_from_owner(event);

@@ -7634,6 +7692,7 @@ err_ns:
perf_detach_cgroup(event);
if (event->ns)
put_pid_ns(event->ns);
+ perf_error_report(event);
kfree(event);

return ERR_PTR(err);
@@ -7912,15 +7971,16 @@ SYSCALL_DEFINE5(perf_event_open,

if (!attr.exclude_kernel) {
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
- return -EACCES;
+ return perf_err_attr(&attr, -EACCES,
+ "kernel tracing forbidden for the unprivileged");
}

if (attr.freq) {
if (attr.sample_freq > sysctl_perf_event_sample_rate)
- return -EINVAL;
+ return perf_err_attr(&attr, -EINVAL, "sample_freq too high");
} else {
if (attr.sample_period & (1ULL << 63))
- return -EINVAL;
+ return perf_err_attr(&attr, -EINVAL, "sample_period too high");
}

/*
--
2.1.4



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/