[RFC 4/6] lib/vsprintf.c: add fmtcheck utility

From: Rasmus Villemoes
Date: Wed Nov 08 2017 - 17:30:57 EST


We have a few places in the kernel where a *printf function is used with
a non-constant format string, making the ordinary static type checking
done by gcc et al. impossible. Some things can still be caught at build
time with appropriate instrumentation (I'm sure one can do much better
than the format_template plugin), but that still leaves a number of
places unchecked. So this patch adds a function for doing run-time
verification of a given format string against a template.

The fmtcheck() function takes two format string arguments and checks
whether they contain the same printf specifiers. If they do, the
first (the string-to-be-checked) string is returned. If not, the
second (the template) is returned. Regardless of which string is
returned at run-time, the __format_arg attribute allows the compiler to
do type-checking if the fmtcheck() function is used inside a *printf
call, e.g.

sprintf(buf, fmtcheck(what->ever, "%d %lx", 0), i, m)

We actually make fmtcheck() a macro that tries very hard to ensure the
template argument is a string literal - partly to help avoid mixing up
the two "const char*" arguments, partly because much of the point of
this sanity checking vanishes if the template is not a literal (e.g.,
the __format_arg annotation becomes useless).

We don't treat "%*.*s" and "%d %d %s" as equivalent, despite them
taking the same vararg types, since they're morally very distinct. In
fact, at least for now, we don't even treat "%d" and "%u" as
equivalent. We can relax that, possibly via FMTCHECK_* flags, but let's
first see which users there might be and what they'd want.

If either string contains a %p, we really should check the following
alphanumerics to see which (if any) extension is used and check that
they match as well. For now, just complain loudly, partly because I'm
lazy, partly because I don't know any in-tree code that might use
fmtcheck() with a %p in the template, and I can't really imagine
anyone would use a %pXX extension in a non-constant format string.

I don't know if WARN is too violent; maybe just pr_warn would be ok.

The BSDs (and libbsd on linux) contain a fmtcheck() function; I took the
name and return semantics from that.

Signed-off-by: Rasmus Villemoes <linux@xxxxxxxxxxxxxxxxxx>
---
include/linux/kernel.h | 6 +++++
lib/vsprintf.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 69 insertions(+)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 4b484ab9e163..d7c6f9a9c024 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -460,6 +460,12 @@ char *kvasprintf(gfp_t gfp, const char *fmt, va_list args);
extern __printf(2, 0)
const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args);

+extern __format_arg(2) __attribute_const__
+const char *_fmtcheck(const char *fmt, const char *tmpl, unsigned flags);
+#define fmtcheck(fmt, tmpl, flags) _fmtcheck(fmt, "" tmpl "", flags)
+#define FMTCHECK_SILENT 0x01
+#define FMTCHECK_NO_EXTRA_ARGS 0x02
+
extern __scanf(2, 3)
int sscanf(const char *, const char *, ...);
extern __scanf(2, 0)
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 86c3385b9eb3..db50acf682e7 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -3030,3 +3030,66 @@ int sscanf(const char *buf, const char *fmt, ...)
return i;
}
EXPORT_SYMBOL(sscanf);
+
+static int
+next_interesting_spec(const char **s, struct printf_spec *spec)
+{
+ int len;
+
+ while (1) {
+ len = format_decode(*s, spec);
+ if (!len)
+ return 0;
+ *s += len;
+ if (spec->type == FORMAT_TYPE_NONE ||
+ spec->type == FORMAT_TYPE_PERCENT_CHAR)
+ continue;
+ return len;
+ }
+}
+
+const char *
+_fmtcheck(const char *fmt, const char *tmpl, unsigned flags)
+{
+ const char *f = fmt;
+ const char *t = tmpl;
+ struct printf_spec fspec = {0}, tspec = {0};
+ int flen, tlen;
+ int warn = !(flags & FMTCHECK_SILENT);
+
+ while (1) {
+ flen = next_interesting_spec(&f, &fspec);
+ tlen = next_interesting_spec(&t, &tspec);
+ if (!flen) {
+ /*
+ * The given format string doesn't have any
+ * more specifiers. It's ok from a type-safety
+ * POV for the template to have extra, but
+ * optionally warn about it (e.g., a single %d
+ * may be required).
+ */
+ if (tlen && (flags & FMTCHECK_NO_EXTRA_ARGS) && warn)
+ WARN_ONCE(warn, "template '%s' expects more arguments than '%s'\n",
+ tmpl, fmt);
+ return fmt;
+ }
+ if (!tlen) {
+ WARN_ONCE(warn, "format string '%s' expects more arguments than template '%s'",
+ fmt, tmpl);
+ return tmpl;
+ }
+ WARN_ONCE(warn && (fspec.type == FORMAT_TYPE_PTR || tspec.type == FORMAT_TYPE_PTR),
+ "don't use %%p in non-constant format strings");
+ /*
+ * Should we also care about flags, field width,
+ * precision? Should we even care about base?
+ */
+ if (fspec.type != tspec.type ||
+ fspec.base != tspec.base) {
+ WARN_ONCE(warn, "format string '%s' incompatible with template '%s'",
+ fmt, tmpl);
+ return tmpl;
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(_fmtcheck);
--
2.11.0