[PATCH v2 1/6] perf: Introduce extended syscall error reporting

From: Alexander Shishkin
Date: Mon Aug 24 2015 - 10:33:26 EST


It has been pointed several times out that perf syscall error reporting
leaves a lot to be desired [1].

This patch introduces a fairly simple extension that allows call sites
to annotate their error codes with arbitrary strings, which will then
be copied to userspace (if they asked for it) along with the module
name that produced the error message in JSON format. This way, we can
provide both human-readable and machine-parsable information to user and
leave room for extensions in the future, such as file name and line
number if kernel debugging is enabled.

Each error "site" is referred to by its index, which is folded into an
integer error value within the range of [-PERF_ERRNO, -MAX_ERRNO], where
PERF_ERRNO is chosen to be below any known error codes, but still leaving
enough room to enumerate error sites. This way, all the traditional macros
will still handle these as error codes and we'd only have to convert them
to their original values right before returning to userspace. This way we
also don't have to worry about keeping a separate pointer to the error
message inside a perf_event.

[1] http://marc.info/?l=linux-kernel&m=141470811013082

Signed-off-by: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
---
include/linux/perf_event.h | 71 ++++++++++++++++++++++++++++++++++
include/uapi/linux/perf_event.h | 8 +++-
kernel/events/core.c | 86 ++++++++++++++++++++++++++++++++++++++---
3 files changed, 159 insertions(+), 6 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2027809433..8e37939d21 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -56,6 +56,77 @@ struct perf_guest_info_callbacks {
#include <linux/cgroup.h>
#include <asm/local.h>

+#ifndef PERF_MODNAME
+#define PERF_MODNAME "perf"
+#endif
+
+/*
+ * Extended error reporting: annotate an error code with a string
+ * and a module name to help users diagnase problems with their
+ * attributes and whatnot.
+ */
+struct perf_err_site {
+ const char *message;
+ const char *owner;
+ const int code;
+};
+
+#ifdef CONFIG_PERF_EVENTS
+
+/*
+ * Place all occurrences of struct perf_err_site into a special section,
+ * so that we can find out their offsets, which we'll use to refer back
+ * to the error sites.
+ */
+extern const struct perf_err_site __start___perf_err[], __stop___perf_err[];
+
+#define __perf_err(__c, __m) ({ \
+ static struct perf_err_site \
+ __attribute__ ((__section__("__perf_err"))) \
+ __err_site = { \
+ .message = (__m), \
+ .owner = PERF_MODNAME, \
+ .code = __builtin_constant_p((__c)) ? \
+ (__c) : -EINVAL, \
+ }; \
+ &__err_site; \
+})
+
+/*
+ * Use part of the [-1, -MAX_ERRNO] errno range for perf's extended error
+ * reporting. Anything within [-PERF_ERRNO, -MAX_ERRNO] is an index of a
+ * perf_err_site structure within __perf_err section. 3k should be enough
+ * for everybody, but let's add a boot-time warning just in case it overflows
+ * one day.
+ */
+#define PERF_ERRNO 1024
+
+static inline int perf_errno(const struct perf_err_site *site)
+{
+ unsigned long err = site - __start___perf_err;
+
+ return -(int)err - PERF_ERRNO;
+}
+
+static inline const struct perf_err_site *perf_errno_to_site(int err)
+{
+ return __start___perf_err - err - PERF_ERRNO;
+}
+
+#ifdef MODULE
+/*
+ * Module support is a tad trickier, but far from rocket surgery. Let's
+ * bypass it for now.
+ */
+#define perf_err(__c, __m) (__c)
+#else
+#define perf_err(__c, __m) (perf_errno(__perf_err((__c), (__m))))
+#endif
+
+#define PERF_ERR_PTR(__e, __m) (ERR_PTR(perf_err(__e, __m)))
+
+#endif
+
struct perf_callchain_entry {
__u64 nr;
__u64 ip[PERF_MAX_STACK_DEPTH];
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 2881145cda..73b6919954 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -264,6 +264,7 @@ enum perf_event_read_format {
/* add: sample_stack_user */
#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
+#define PERF_ATTR_SIZE_VER6 120 /* add: perf_err */

/*
* Hardware event_id to monitor via a performance monitoring event:
@@ -375,7 +376,12 @@ struct perf_event_attr {
* Wakeup watermark for AUX area
*/
__u32 aux_watermark;
- __u32 __reserved_2; /* align to __u64 */
+
+ /*
+ * Extended error reporting buffer
+ */
+ __u32 perf_err_size;
+ __u64 perf_err;
};

#define perf_flags(attr) (*(&(attr)->read_format + 1))
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ae16867670..c6f43d1d5f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -49,6 +49,79 @@

#include <asm/irq_regs.h>

+static bool extended_reporting_enabled(struct perf_event_attr *attr)
+{
+ if (attr->size >= PERF_ATTR_SIZE_VER6 &&
+ attr->perf_err_size > 0)
+ return true;
+
+ return false;
+}
+
+/*
+ * Provide a JSON formatted error report to the user if they asked for it.
+ */
+static void perf_error_report_site(struct perf_event_attr *attr,
+ const struct perf_err_site *site)
+{
+ unsigned long len;
+ char *buffer;
+
+ if (!site || !extended_reporting_enabled(attr))
+ return;
+
+ /* in case of nested perf_err()s, which you shouldn't really do */
+ while (site->code <= -PERF_ERRNO)
+ site = perf_errno_to_site(site->code);
+
+ buffer = kasprintf(GFP_KERNEL,
+ "{\n"
+ "\t\"code\": %d,\n"
+ "\t\"module\": \"%s\",\n"
+ "\t\"message\": \"%s\"\n"
+ "}\n",
+ site->code, site->owner, site->message
+ );
+ if (!buffer)
+ return;
+
+ /* trim the buffer to the supplied boundary */
+ len = strlen(buffer);
+ if (len >= attr->perf_err_size) {
+ len = attr->perf_err_size - 1;
+ buffer[len] = 0;
+ }
+
+ if (copy_to_user((void __user *)attr->perf_err, buffer, len + 1)) {
+ /* if we failed to copy once, don't bother later */
+ attr->perf_err_size = 0;
+ }
+
+ kfree(buffer);
+}
+
+/*
+ * Synchronous version of perf_err(), for the paths where we return immediately
+ * back to userspace.
+ */
+#define perf_err_sync(__attr, __c, __m) ({ \
+ perf_error_report_site(__attr, __perf_err((__c), (__m))); \
+ (__c); \
+})
+
+static int perf_error_report(struct perf_event_attr *attr, int err)
+{
+ const struct perf_err_site *site;
+
+ if (err > -PERF_ERRNO)
+ return err;
+
+ site = perf_errno_to_site(err);
+ perf_error_report_site(attr, site);
+
+ return site->code;
+}
+
static struct workqueue_struct *perf_wq;

typedef int (*remote_function_f)(void *);
@@ -3904,7 +3977,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
ret = perf_read_hw(event, buf, count);
perf_event_ctx_unlock(event, ctx);

- return ret;
+ return perf_error_report(&event->attr, ret);
}

static unsigned int perf_poll(struct file *file, poll_table *wait)
@@ -4152,7 +4225,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
ret = _perf_ioctl(event, cmd, arg);
perf_event_ctx_unlock(event, ctx);

- return ret;
+ return perf_error_report(&event->attr, ret);
}

#ifdef CONFIG_COMPAT
@@ -4752,7 +4825,7 @@ aux_unlock:
if (event->pmu->event_mapped)
event->pmu->event_mapped(event);

- return ret;
+ return perf_error_report(&event->attr, ret);
}

static int perf_fasync(int fd, struct file *filp, int on)
@@ -4766,7 +4839,7 @@ static int perf_fasync(int fd, struct file *filp, int on)
mutex_unlock(&inode->i_mutex);

if (retval < 0)
- return retval;
+ return perf_error_report(&event->attr, retval);

return 0;
}
@@ -8352,7 +8425,7 @@ err_group_fd:
fdput(group);
err_fd:
put_unused_fd(event_fd);
- return err;
+ return perf_error_report(&attr, err);
}

/**
@@ -9130,6 +9203,9 @@ void __init perf_event_init(void)
*/
BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head))
!= 1024);
+
+ /* Too many error sites; see the comment at PERF_ERRNO definition */
+ WARN_ON(__stop___perf_err - __start___perf_err > MAX_ERRNO - PERF_ERRNO);
}

ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
--
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/