[PATCH 02/12] Add parse_integer() (replacement for simple_strto*())

From: Alexey Dobriyan
Date: Fri May 08 2015 - 14:30:50 EST


kstrto*() and kstrto*_from_user() family of functions were added
to help with parsing one integer written as string to proc/sysfs/debugfs
files. But they have a limitation: string passed must end with \0 or \n\0.
There are enough places where kstrto*() functions can't be used because of
this limitation. Trivial example: major:minor "%u:%u".

Currently the only way to parse everything is simple_strto*() functions.
But they are suboptimal:
* they do not detect overflow (can be fixed, but no one bothered since ~0.99.11),
* there are only 4 of them -- long and "long long" versions,
This leads to silent truncation in the most simple case:

val = strtoul(s, NULL, 0);

* half of the people think that "char **endp" argument is necessary and
add unnecessary variable.

OpenBSD people, fed up with how complex correct integer parsing is, added
strtonum(3) to fixup for deficiencies of libc-style integer parsing:
http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man3/strtonum.3?query=strtonum&arch=i386

It'd be OK to copy that but it relies on "errno" and fixed strings as
error reporting channel which I think is not OK for kernel.
strtonum() also doesn't report number of characted consumed.

What to do?

Enter parse_integer().

int parse_integer(const char *s, unsigned int base, T *val);

Rationale:
* parse_integer() is exactly 1 (one) interface not 4 or many, one for each type.

* parse_integer() reports -E errors reliably in a canonical kernel way:

rv = parse_integer(str, 10, &val);
if (rv < 0)
return rv;

* parse_integer() writes result only if there were no errors,
at least one digit has to be consumed,

* parse_integer doesn't mix error reporting channel and value channel,
it does mix error and number-of-character-consumed channel, though.

* parse_integer() reports number of characters consumed, makes parsing
multiple values easy:

rv = parse_integer(str, 0, &val1);
if (rv < 0)
return rv;
str += rv;
if (*str++ != ':')
return -EINVAL;
rv = parse_integer(str, 0, &val2);
if (rv < 0)
return rv;
if (str[rv] != '\0')
return -EINVAL;

There are several deficiencies in parse_integer() compared to strto*():

* can't be used in initializers:

const T x = strtoul();

* can't be used with bitfields,
* can't be used in expressions:

x = strtol() * 1024;
x = y = strtol();
x += strtol();

* currently there is no support for _Bool and at least one place
where simple_strtoul() is directly assigned to _Bool variable.
It is trivial to add, but not clear if it should only accept "0" and "1",
because, say, module param code accepts 0, 1, y, n, Y and N.

In defense of parse_integer() all I can say, is that using strtol() in
expression or initializer promotes no error checking and thus probably
should not be encouraged in C, language with no built-in error checking
anyway.

The amount of "x = y = strtol()" expressions in kernel is very small.
The amount of direct usage in expression is not small, but can be
counted as an acceptable loss.

Misc suggestions and ideas from Rasmus Villemoes.

v2:
* self-contained header
* trim code size a bit
* testsuite

Signed-off-by: Alexey Dobriyan <adobriyan@xxxxxxxxx>
---
include/linux/kernel.h | 7 +-
include/linux/parse-integer.h | 79 +++++++++++++++++
lib/Makefile | 1 +
lib/kstrtox.c | 76 +++++-----------
lib/kstrtox.h | 1 -
lib/parse-integer.c | 198 ++++++++++++++++++++++++++++++++++++++++++
6 files changed, 305 insertions(+), 57 deletions(-)
create mode 100644 include/linux/parse-integer.h
create mode 100644 lib/parse-integer.c

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3a5b48e..9d3f543 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -10,6 +10,7 @@
#include <linux/bitops.h>
#include <linux/log2.h>
#include <linux/typecheck.h>
+#include <linux/parse-integer.h>
#include <linux/printk.h>
#include <linux/dynamic_debug.h>
#include <asm/byteorder.h>
@@ -387,8 +388,10 @@ static inline int __must_check kstrtos32_from_user(const char __user *s, size_t
return kstrtoint_from_user(s, count, base, res);
}

-/* Obsolete, do not use. Use kstrto<foo> instead */
-
+/*
+ * Obsolete, do not use.
+ * Use parse_integer(), kstrto*(), kstrto*_from_user(), sscanf().
+ */
extern unsigned long simple_strtoul(const char *,char **,unsigned int);
extern long simple_strtol(const char *,char **,unsigned int);
extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
diff --git a/include/linux/parse-integer.h b/include/linux/parse-integer.h
new file mode 100644
index 0000000..5dd0347
--- /dev/null
+++ b/include/linux/parse-integer.h
@@ -0,0 +1,79 @@
+#ifndef _PARSE_INTEGER_H
+#define _PARSE_INTEGER_H
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+/*
+ * int parse_integer(const char *s, unsigned int base, T *val);
+ *
+ * Convert integer string representation to an integer.
+ * Range of accepted values equals to that of type T.
+ *
+ * Conversion to unsigned integer accepts sign "+".
+ * Conversion to signed integer accepts sign "+" and sign "-".
+ *
+ * Radix 0 means autodetection: leading "0x" implies radix 16,
+ * leading "0" implies radix 8, otherwise radix is 10.
+ * Autodetection hint works after optional sign, but not before.
+ *
+ * Return number of characters parsed or -E.
+ *
+ * "T=char" case is not supported because -f{un,}signed-char can silently
+ * change range of accepted values.
+ */
+#define parse_integer(s, base, val) \
+({ \
+ const char *_s = (s); \
+ unsigned int _base = (base); \
+ typeof(&(val)[0]) _val = (val); \
+ \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(_val), signed char *), \
+ _parse_integer_sc(_s, _base, (void *)_val), \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(_val), unsigned char *), \
+ _parse_integer_uc(_s, _base, (void *)_val), \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(_val), short *), \
+ _parse_integer_s(_s, _base, (void *)_val), \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(_val), unsigned short *), \
+ _parse_integer_us(_s, _base, (void *)_val), \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(_val), int *), \
+ _parse_integer_i(_s, _base, (void *)_val), \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(_val), unsigned int *), \
+ _parse_integer_u(_s, _base, (void *)_val), \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(_val), long *) && sizeof(long) == 4,\
+ _parse_integer_i(_s, _base, (void *)_val), \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(_val), long *) && sizeof(long) == 8,\
+ _parse_integer_ll(_s, _base, (void *)_val), \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(_val), unsigned long *) && sizeof(unsigned long) == 4,\
+ _parse_integer_u(_s, _base, (void *)_val), \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(_val), unsigned long *) && sizeof(unsigned long) == 8,\
+ _parse_integer_ull(_s, _base, (void *)_val), \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(_val), long long *), \
+ _parse_integer_ll(_s, _base, (void *)_val), \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(_val), unsigned long long *),\
+ _parse_integer_ull(_s, _base, (void *)_val), \
+ _parse_integer_link_time_error())))))))))))); \
+})
+/* internal, do not use */
+int _parse_integer_sc(const char *s, unsigned int base, signed char *val);
+int _parse_integer_uc(const char *s, unsigned int base, unsigned char *val);
+int _parse_integer_s(const char *s, unsigned int base, short *val);
+int _parse_integer_us(const char *s, unsigned int base, unsigned short *val);
+int _parse_integer_i(const char *s, unsigned int base, int *val);
+int _parse_integer_u(const char *s, unsigned int base, unsigned int *val);
+int _parse_integer_ll(const char *s, unsigned int base, long long *val);
+int _parse_integer_ull(const char *s, unsigned int base, unsigned long long *val);
+void _parse_integer_link_time_error(void);
+const char *_parse_integer_fixup_radix(const char *s, unsigned int *base);
+#endif
diff --git a/lib/Makefile b/lib/Makefile
index 6c37933..e69b356 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
obj-y += hexdump.o
obj-$(CONFIG_TEST_HEXDUMP) += test-hexdump.o
obj-y += kstrtox.o
+obj-y += parse-integer.o
obj-$(CONFIG_TEST_BPF) += test_bpf.o
obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
obj-$(CONFIG_TEST_KASAN) += test_kasan.o
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index 94be244..652fd6b 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -20,22 +20,6 @@
#include <asm/uaccess.h>
#include "kstrtox.h"

-const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
-{
- if (*base == 0) {
- if (s[0] == '0') {
- if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
- *base = 16;
- else
- *base = 8;
- } else
- *base = 10;
- }
- if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
- s += 2;
- return s;
-}
-
/*
* Convert non-negative integer string representation in explicitly given radix
* to an integer.
@@ -83,26 +67,6 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
return rv;
}

-static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
-{
- unsigned long long _res;
- unsigned int rv;
-
- s = _parse_integer_fixup_radix(s, &base);
- rv = _parse_integer(s, base, &_res);
- if (rv & KSTRTOX_OVERFLOW)
- return -ERANGE;
- if (rv == 0)
- return -EINVAL;
- s += rv;
- if (*s == '\n')
- s++;
- if (*s)
- return -EINVAL;
- *res = _res;
- return 0;
-}
-
/**
* kstrtoull - convert a string to an unsigned long long
* @s: The start of the string. The string must be null-terminated, and may also
@@ -121,9 +85,19 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
*/
int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
{
- if (s[0] == '+')
+ unsigned long long _res;
+ int rv;
+
+ rv = parse_integer(s, base, &_res);
+ if (rv < 0)
+ return rv;
+ s += rv;
+ if (*s == '\n')
s++;
- return _kstrtoull(s, base, res);
+ if (*s)
+ return -EINVAL;
+ *res = _res;
+ return 0;
}
EXPORT_SYMBOL(kstrtoull);

@@ -145,24 +119,18 @@ EXPORT_SYMBOL(kstrtoull);
*/
int kstrtoll(const char *s, unsigned int base, long long *res)
{
- unsigned long long tmp;
+ long long _res;
int rv;

- if (s[0] == '-') {
- rv = _kstrtoull(s + 1, base, &tmp);
- if (rv < 0)
- return rv;
- if ((long long)-tmp > 0)
- return -ERANGE;
- *res = -tmp;
- } else {
- rv = kstrtoull(s, base, &tmp);
- if (rv < 0)
- return rv;
- if ((long long)tmp < 0)
- return -ERANGE;
- *res = tmp;
- }
+ rv = parse_integer(s, base, &_res);
+ if (rv < 0)
+ return rv;
+ s += rv;
+ if (*s == '\n')
+ s++;
+ if (*s)
+ return -EINVAL;
+ *res = _res;
return 0;
}
EXPORT_SYMBOL(kstrtoll);
diff --git a/lib/kstrtox.h b/lib/kstrtox.h
index f13eeea..7b1f447 100644
--- a/lib/kstrtox.h
+++ b/lib/kstrtox.h
@@ -2,7 +2,6 @@
#define _LIB_KSTRTOX_H

#define KSTRTOX_OVERFLOW (1U << 31)
-const char *_parse_integer_fixup_radix(const char *s, unsigned int *base);
unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res);

#endif
diff --git a/lib/parse-integer.c b/lib/parse-integer.c
new file mode 100644
index 0000000..eb24307
--- /dev/null
+++ b/lib/parse-integer.c
@@ -0,0 +1,198 @@
+/*
+ * See parse_integer().
+ *
+ * Individual dispatch functions in this file aren't supposed to be used
+ * directly and thus aren't advertised and documented despited being exported.
+ *
+ * Do not use any function in this file for any reason.
+ */
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/parse-integer.h>
+#include <asm/bug.h>
+
+const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
+{
+ if (*base == 0) {
+ if (s[0] == '0') {
+ if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
+ *base = 16;
+ else
+ *base = 8;
+ } else
+ *base = 10;
+ }
+ if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
+ s += 2;
+ BUG_ON(*base < 2 || *base > 16);
+ return s;
+}
+
+static int __parse_integer(const char *s, unsigned int base, unsigned long long *val)
+{
+ const char *s0 = s, *sd;
+ unsigned long long acc;
+
+ s = sd = _parse_integer_fixup_radix(s0, &base);
+ acc = 0;
+ while (*s) {
+ unsigned int d;
+
+ if ('0' <= *s && *s <= '9')
+ d = *s - '0';
+ else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')
+ d = _tolower(*s) - 'a' + 10;
+ else
+ break;
+ if (d >= base)
+ break;
+ /* Overflow can't happen early enough. */
+ if ((acc >> 60) && acc > div_u64(ULLONG_MAX - d, base))
+ return -ERANGE;
+ acc = acc * base + d;
+ s++;
+ }
+ /* At least one digit has to be converted. */
+ if (s == sd)
+ return -EINVAL;
+ *val = acc;
+ /* Radix 1 is not supported otherwise returned length can overflow. */
+ return s - s0;
+}
+
+int _parse_integer_ull(const char *s, unsigned int base, unsigned long long *val)
+{
+ char sign;
+ int rv;
+
+ sign = 0;
+ if (*s == '-')
+ return -EINVAL;
+ else if (*s == '+')
+ sign = *s++;
+
+ rv = __parse_integer(s, base, val);
+ if (rv < 0)
+ return rv;
+ return rv + !!sign;
+}
+EXPORT_SYMBOL(_parse_integer_ull);
+
+int _parse_integer_ll(const char *s, unsigned int base, long long *val)
+{
+ unsigned long long tmp;
+ char sign;
+ int rv;
+
+ sign = 0;
+ if (*s == '-' || *s == '+')
+ sign = *s++;
+
+ rv = __parse_integer(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (sign == '-') {
+ if ((long long)-tmp > 0)
+ return -ERANGE;
+ *val = -tmp;
+ } else {
+ if ((long long)tmp < 0)
+ return -ERANGE;
+ *val = tmp;
+ }
+ return rv + !!sign;
+}
+EXPORT_SYMBOL(_parse_integer_ll);
+
+int _parse_integer_u(const char *s, unsigned int base, unsigned int *val)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = _parse_integer_ull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned int)tmp)
+ return -ERANGE;
+ *val = tmp;
+ return rv;
+}
+EXPORT_SYMBOL(_parse_integer_u);
+
+int _parse_integer_i(const char *s, unsigned int base, int *val)
+{
+ long long tmp;
+ int rv;
+
+ rv = _parse_integer_ll(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (int)tmp)
+ return -ERANGE;
+ *val = tmp;
+ return rv;
+}
+EXPORT_SYMBOL(_parse_integer_i);
+
+int _parse_integer_us(const char *s, unsigned int base, unsigned short *val)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = _parse_integer_ull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned short)tmp)
+ return -ERANGE;
+ *val = tmp;
+ return rv;
+}
+EXPORT_SYMBOL(_parse_integer_us);
+
+int _parse_integer_s(const char *s, unsigned int base, short *val)
+{
+ long long tmp;
+ int rv;
+
+ rv = _parse_integer_ll(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (short)tmp)
+ return -ERANGE;
+ *val = tmp;
+ return rv;
+}
+EXPORT_SYMBOL(_parse_integer_s);
+
+int _parse_integer_uc(const char *s, unsigned int base, unsigned char *val)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = _parse_integer_ull(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned char)tmp)
+ return -ERANGE;
+ *val = tmp;
+ return rv;
+}
+EXPORT_SYMBOL(_parse_integer_uc);
+
+int _parse_integer_sc(const char *s, unsigned int base, signed char *val)
+{
+ long long tmp;
+ int rv;
+
+ rv = _parse_integer_ll(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (signed char)tmp)
+ return -ERANGE;
+ *val = tmp;
+ return rv;
+}
+EXPORT_SYMBOL(_parse_integer_sc);
--
2.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/