[PATCH RFC v3 1/6] exterr: Introduce extended syscall error reporting

From: Alexander Shishkin
Date: Fri Sep 11 2015 - 12:00:42 EST


It has been pointed out several times that certain system calls' error
reporting leaves a lot to be desired [1], [2]. Such system calls would
take complex parameter structures as their input and return -EINVAL if
one or more parameters are invalid or in conflict leaving it up to the
user to figure out exactly what is wrong with their request. One such
syscall is perf_event_open() with its attribute structure containing
40+ parameters and tens of parameter validation checks.

This patch introduces a fairly simple infrastructure that allows call
sites to annotate their error codes with arbitrary strings, which the
userspace can fetch using a prctl() along with the module name that
produced the error message, file name, line number and optionally any
amount of additional information in JSON format. This way, we can
provide both human-readable and machine-parsable information to user and
leave room for domain-specific extensions, such as the field in the
parameter structure that caused the error.

Each error "site" is referred to by its index, which is folded into an
integer error value within the range of [-EXT_ERRNO, -MAX_ERRNO], where
EXT_ERRNO is chosen to be below any known error codes, but still leaving
enough room to enumerate error sites. This way, all the traditional macros
will still handle these as error codes and we'd only have to convert them
to their original values right before returning to userspace. At that
point we'd also store a pointer to the error descriptor in the task_struct,
so that a subsequent prctl() call can retrieve it.

[1] http://marc.info/?l=linux-kernel&m=141470811013082
[2] http://marc.info/?l=linux-kernel&m=144049385530680

Signed-off-by: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
---
include/linux/exterr.h | 99 ++++++++++++++++++++++++++++
include/linux/sched.h | 1 +
include/uapi/linux/prctl.h | 5 ++
kernel/sys.c | 6 ++
lib/Makefile | 2 +
lib/exterr.c | 157 +++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 270 insertions(+)
create mode 100644 include/linux/exterr.h
create mode 100644 lib/exterr.c

diff --git a/include/linux/exterr.h b/include/linux/exterr.h
new file mode 100644
index 0000000000..1f412fe9ac
--- /dev/null
+++ b/include/linux/exterr.h
@@ -0,0 +1,99 @@
+/*
+ * Extended syscall error reporting
+ */
+#ifndef _LINUX_EXTERR_H
+#define _LINUX_EXTERR_H
+
+#include <linux/compiler.h>
+
+/*
+ * Extended error reporting: annotate an error code with a string
+ * and a module name to help users diagnase problems with their
+ * attributes and other syscall parameters.
+ */
+
+/*
+ * This is the basic error descriptor structure that is statically
+ * allocated for every annotated error (error site).
+ *
+ * Subsystems that wish to extend this structure should embed it
+ * and provide a callback for formatting the additional fields.
+ */
+struct ext_err_site {
+ const char *message;
+ const char *owner;
+ const char *file;
+ const int line;
+ const int code;
+};
+
+/*
+ * Error domain descriptor (compile/link time)
+ */
+struct ext_err_domain_desc {
+ const char *name;
+ const size_t err_site_size;
+ const void *start, *end;
+ char *(*format)(void *site);
+ int first;
+ int last;
+};
+
+extern struct ext_err_domain_desc __attribute__((weak)) __start___ext_err_domain_desc[];
+extern struct ext_err_domain_desc __attribute__((weak)) __stop___ext_err_domain_desc[];
+
+#define DECLARE_EXTERR_DOMAIN(__name, __format) \
+ extern const struct __name ## _ext_err_site __attribute__((weak)) __start_ ## __name ## _ext_err[]; \
+ extern const struct __name ## _ext_err_site __attribute__((weak)) __stop_ ## __name ## _ext_err[]; \
+ const struct ext_err_domain_desc __used \
+ __attribute__ ((__section__("__ext_err_domain_desc"))) \
+ __name ## _ext_err_domain_desc = { \
+ .name = __stringify(__name), \
+ .err_site_size = sizeof(struct __name ## _ext_err_site), \
+ .start = __start_ ## __name ## _ext_err, \
+ .end = __stop_ ## __name ## _ext_err, \
+ .format = __format, \
+ }; \
+
+#define __ext_err(__domain, __c, __m, __domain__fields ...) ({ \
+ static struct __domain ## _ext_err_site \
+ __attribute__ ((__section__(__stringify(__domain) "_ext_err"))) \
+ __err_site = { \
+ .site = { \
+ .message = (__m), \
+ .owner = EXTERR_MODNAME, \
+ .file = __FILE__, \
+ .line = __LINE__, \
+ .code = __builtin_constant_p((__c)) ? \
+ (__c) : -EINVAL, \
+ }, \
+ ## __domain__fields, \
+ }; \
+ &__err_site; \
+})
+
+#define ext_err(__domain, __c, __m, __domain__fields ...) \
+ ({ \
+ extern const struct __domain ## _ext_err_site __start_ ## __domain ## _ext_err[]; \
+ extern struct ext_err_domain_desc __domain ## _ext_err_domain_desc; \
+ -(__domain ## _ext_err_domain_desc.first + \
+ (__ext_err(__domain, __c, __m, __domain__fields) - \
+ __start_ ## __domain ## _ext_err)); \
+ })
+
+int ext_err_copy_to_user(void __user *buf, size_t size);
+
+int ext_err_errno(int code);
+
+/*
+ * Use part of the [-1, -MAX_ERRNO] errno range for the extended error
+ * reporting. Anything within [-EXT_ERRNO, -MAX_ERRNO] is an index of a
+ * ext_err_site structure within __ext_err section. 3k should be enough
+ * for everybody, but let's add a boot-time warning just in case it overflows
+ * one day.
+ */
+#define EXT_ERRNO 1024
+
+#define EXT_ERR_PTR(__e, __m) (ERR_PTR(ext_err(__e, __m)))
+
+#endif /* _LINUX_EXTERR_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 119823decc..0eeeda62ef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1776,6 +1776,7 @@ struct task_struct {
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long task_state_change;
#endif
+ int ext_err_code;
int pagefault_disabled;
/* CPU-specific state of this task */
struct thread_struct thread;
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 31891d9535..c531b8058e 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -190,4 +190,9 @@ struct prctl_mm_map {
# define PR_FP_MODE_FR (1 << 0) /* 64b FP registers */
# define PR_FP_MODE_FRE (1 << 1) /* 32b compatibility */

+/*
+ * Retrieve the extended error report for the last error
+ */
+#define PR_GET_ERR_DESC 47
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 259fda25eb..70f1e435c1 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -54,6 +54,7 @@
#include <linux/cred.h>

#include <linux/kmsg_dump.h>
+#include <linux/exterr.h>
/* Move somewhere else to avoid recompiling? */
#include <generated/utsrelease.h>

@@ -2267,6 +2268,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_GET_FP_MODE:
error = GET_FP_MODE(me);
break;
+ case PR_GET_ERR_DESC:
+ if (arg4 || arg5)
+ return -EINVAL;
+ error = ext_err_copy_to_user((void __user *)arg2, (size_t)arg3);
+ break;
default:
error = -EINVAL;
break;
diff --git a/lib/Makefile b/lib/Makefile
index 6897b52758..e95330d245 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -205,3 +205,5 @@ quiet_cmd_build_OID_registry = GEN $@
clean-files += oid_registry_data.c

obj-$(CONFIG_UCS2_STRING) += ucs2_string.o
+
+obj-y += exterr.o
diff --git a/lib/exterr.c b/lib/exterr.c
new file mode 100644
index 0000000000..c56c88db58
--- /dev/null
+++ b/lib/exterr.c
@@ -0,0 +1,157 @@
+/*
+ * Extended error reporting
+ */
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/exterr.h>
+#include <linux/uaccess.h>
+
+static struct ext_err_domain_desc ** domains;
+static unsigned int nr_domains;
+
+int exterr_last = EXT_ERRNO;
+
+static struct ext_err_domain_desc *ext_err_domain_find(int code)
+{
+ unsigned int i = nr_domains / 2;
+
+ if (code < EXT_ERRNO || code > domains[nr_domains - 1]->last)
+ return NULL;
+
+ do {
+ if (code > domains[i]->last)
+ i -= i / 2;
+ else if (code < domains[i]->first)
+ i += i / 2;
+ else
+ return domains[i];
+ } while (i < nr_domains);
+
+ return NULL;
+}
+
+static struct ext_err_site *ext_err_site_find(int code)
+{
+ struct ext_err_domain_desc *dom = ext_err_domain_find(code);
+ struct ext_err_site *site;
+ unsigned long off;
+
+ if (!code)
+ return NULL;
+
+ /* shouldn't happen */
+ if (WARN_ON_ONCE(!dom))
+ return NULL;
+
+ code -= dom->first;
+ off = (unsigned long)dom->start + dom->err_site_size * code;
+ site = (struct ext_err_site *)off;
+
+ return site;
+}
+
+int ext_err_copy_to_user(void __user *buf, size_t size)
+{
+ struct ext_err_domain_desc *dom = ext_err_domain_find(current->ext_err_code);
+ struct ext_err_site *site = ext_err_site_find(current->ext_err_code);
+ char *output, *dom_fmt = NULL;
+ unsigned long len;
+ int ret;
+
+ if (!site)
+ return 0;
+
+ if (dom->format)
+ dom_fmt = dom->format(site);
+
+ output = kasprintf(GFP_KERNEL,
+ "{\n"
+ "\t\"file\": \"%s\",\n"
+ "\t\"line\": %d,\n"
+ "\t\"code\": %d,\n"
+ "\t\"module\": \"%s\",\n"
+ "\t\"message\": \"%s\"%s\n%s"
+ "}\n",
+ site->file, site->line,
+ site->code, site->owner, site->message,
+ dom_fmt ? "," : "",
+ dom_fmt ? dom_fmt : ""
+ );
+
+ kfree(dom_fmt);
+
+ if (!output)
+ return -ENOMEM;
+
+ /* trim the buffer to the supplied boundary */
+ len = strlen(output);
+ if (len >= size) {
+ len = size - 1;
+ output[len] = 0;
+ }
+
+ ret = copy_to_user(buf, output, len + 1);
+
+ kfree(output);
+
+ if (!ret)
+ current->ext_err_code = 0;
+
+ return ret ? ret : len + 1;
+}
+
+int ext_err_errno(int code)
+{
+ struct ext_err_site *site;
+
+ if (code > -EXT_ERRNO)
+ return code;
+
+ site = ext_err_site_find(-code);
+ if (!site)
+ return code;
+
+ current->ext_err_code = -code;
+
+ return site->code;
+}
+
+static int ext_err_init(void)
+{
+ struct ext_err_domain_desc *dom;
+ size_t size;
+ int i;
+
+ if (!__start___ext_err_domain_desc)
+ return 0;
+
+ nr_domains = __stop___ext_err_domain_desc - __start___ext_err_domain_desc;
+ domains = kcalloc(nr_domains, sizeof(void *), GFP_KERNEL);
+ if (!domains)
+ return -ENOMEM;
+
+ /* allocate error code to the domains */
+ printk("Extended error reporting domains:\n");
+ for (dom = __start___ext_err_domain_desc, i = 0;
+ dom < __stop___ext_err_domain_desc;
+ dom++, i++) {
+ size = dom->end - dom->start;
+ size /= dom->err_site_size;
+
+ if (WARN_ON_ONCE(exterr_last + size >= MAX_ERRNO))
+ break;
+
+ domains[i] = dom;
+ domains[i]->first = exterr_last;
+ domains[i]->last = exterr_last + size - 1;
+
+ printk(" \"%s\": %d..%d\n", dom->name,
+ -domains[i]->first, -domains[i]->last);
+
+ exterr_last += size;
+ }
+
+ return 0;
+}
+
+core_initcall(ext_err_init);
--
2.5.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/