Re: [PATCH 1/2] lib/strtox: introduce kstrtoull_suffix() helper

From: David Disseldorp
Date: Mon Dec 18 2023 - 08:12:03 EST


Hi Qu,

On Fri, 15 Dec 2023 19:09:23 +1030, Qu Wenruo wrote:

> Just as mentioned in the comment of memparse(), the simple_stroull()
> usage can lead to overflow all by itself.
>
> Furthermore, the suffix calculation is also super overflow prone because
> that some suffix like "E" itself would eat 60bits, leaving only 4 bits
> available.
>
> And that suffix "E" can also lead to confusion since it's using the same
> char of hex Ox'E'.
>
> One simple example to expose all the problem is to use memparse() on
> "25E".
> The correct value should be 28823037615171174400, but the suffix E makes
> it super simple to overflow, resulting the incorrect value
> 10376293541461622784 (9E).
>
> So here we introduce a new helper to address the problem,
> kstrtoull_suffix():
>
> - Enhance _kstrtoull()
> This allow _kstrtoull() to return even if it hits an invalid char, as
> long as the optional parameter @retptr is provided.
>
> If @retptr is provided, _kstrtoull() would try its best to parse the
> valid part, and leave the remaining to be handled by the caller.
>
> If @retptr is not provided, the behavior is not altered.
>
> - New kstrtoull_suffix() helper
> This new helper utilize the new @retptr capability of _kstrtoull(),
> and provides 2 new ability:
>
> * Allow certain suffixes to be chosen
> The recommended suffix list is "KkMmGgTtPp", excluding the overflow
> prone "Ee". Undermost cases there is really no need to use "E" suffix
> anyway.
> And for those who really need that exabytes suffix, they can enable
> that suffix pretty easily.
>
> * Add overflow checks for the suffixes
> If the original number string is fine, but with the extra left
> shift overflow happens, then -EOVERFLOW is returned.
>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Cc: Christophe JAILLET <christophe.jaillet@xxxxxxxxxx>
> Cc: Andy Shevchenko <andriy.shevchenko@xxxxxxxxxxxxxxx>
> Cc: linux-kernel@xxxxxxxxxxxxxxx
> Signed-off-by: Qu Wenruo <wqu@xxxxxxxx>
> ---
> include/linux/kstrtox.h | 7 +++
> lib/kstrtox.c | 113 ++++++++++++++++++++++++++++++++++++++--
> 2 files changed, 115 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/kstrtox.h b/include/linux/kstrtox.h
> index 7fcf29a4e0de..12c754152c15 100644
> --- a/include/linux/kstrtox.h
> +++ b/include/linux/kstrtox.h
> @@ -9,6 +9,13 @@
> int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
> int __must_check _kstrtol(const char *s, unsigned int base, long *res);
>
> +/*
> + * The default suffix list would not include "E" since it's too easy to overflow
> + * and not much real world usage.
> + */
> +#define KSTRTOULL_SUFFIX_DEFAULT ("KkMmGgTtPp")
> +int kstrtoull_suffix(const char *s, unsigned int base, unsigned long long *res,
> + const char *suffixes);
> int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res);
> int __must_check kstrtoll(const char *s, unsigned int base, long long *res);
>
> diff --git a/lib/kstrtox.c b/lib/kstrtox.c
> index d586e6af5e5a..63831207dfdd 100644
> --- a/lib/kstrtox.c
> +++ b/lib/kstrtox.c
> @@ -93,7 +93,8 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
> return _parse_integer_limit(s, base, p, INT_MAX);
> }
>
> -static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
> +static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res,
> + char **retptr)
> {
> unsigned long long _res;
> unsigned int rv;
> @@ -105,11 +106,19 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
> if (rv == 0)
> return -EINVAL;
> s += rv;
> - if (*s == '\n')
> +
> + /*
> + * If @retptr is provided, caller is responsible to detect
> + * the extra chars, otherwise we can skip one newline.
> + */
> + if (!retptr && *s == '\n')
> s++;
> - if (*s)
> + if (!retptr && *s)
> return -EINVAL;
> +
> *res = _res;
> + if (retptr)
> + *retptr = (char *)s;
> return 0;
> }
>
> @@ -133,10 +142,104 @@ int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
> {
> if (s[0] == '+')
> s++;
> - return _kstrtoull(s, base, res);
> + return _kstrtoull(s, base, res, NULL);
> }
> EXPORT_SYMBOL(kstrtoull);
>
> +/**
> + * kstrtoull_suffix - convert a string to ull with suffixes support
> + * @s: The start of the string. The string must be null-terminated, and may also
> + * include a single newline before its terminating null.
> + * @base: The number base to use. The maximum supported base is 16. If base is
> + * given as 0, then the base of the string is automatically detected with the
> + * conventional semantics - If it begins with 0x the number will be parsed as a
> + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
> + * parsed as an octal number. Otherwise it will be parsed as a decimal.
> + * @res: Where to write the result of the conversion on success.
> + * @suffixes: A string of acceptable suffixes, must be provided. Or caller
> + * should use kstrtoull() directly.

The suffixes parameter seems a bit cumbersome; callers need to provide
both upper and lower cases, and unsupported characters aren't checked
for. However, I can't think of any better suggestions at this stage.

> + *
> + *
> + * Return 0 on success.
> + *
> + * Return -ERANGE on overflow or -EINVAL if invalid chars found.
> + * Return value must be checked.
> + */
> +int kstrtoull_suffix(const char *s, unsigned int base, unsigned long long *res,
> + const char *suffixes)
> +{
> + unsigned long long init_value;
> + unsigned long long final_value;
> + char *endptr;
> + int ret;
> +
> + ret = _kstrtoull(s, base, &init_value, &endptr);
> + /* Either already overflow or no number string at all. */
> + if (ret < 0)
> + return ret;
> + final_value = init_value;
> + /* No suffixes. */
> + if (!*endptr)
> + goto done;
> +
> + switch (*endptr) {
> + case 'K':
> + case 'k':
> + if (!strchr(suffixes, *endptr))
> + return -EINVAL;
> + final_value <<= 10;
> + endptr++;
> + break;
> + case 'M':
> + case 'm':
> + if (!strchr(suffixes, *endptr))
> + return -EINVAL;
> + final_value <<= 20;
> + endptr++;
> + break;
> + case 'G':
> + case 'g':
> + if (!strchr(suffixes, *endptr))
> + return -EINVAL;
> + final_value <<= 30;
> + endptr++;
> + break;
> + case 'T':
> + case 't':
> + if (!strchr(suffixes, *endptr))
> + return -EINVAL;
> + final_value <<= 40;
> + endptr++;
> + break;
> + case 'P':
> + case 'p':
> + if (!strchr(suffixes, *endptr))
> + return -EINVAL;
> + final_value <<= 50;
> + endptr++;
> + break;
> + case 'E':
> + case 'e':
> + if (!strchr(suffixes, *endptr))
> + return -EINVAL;
> + final_value <<= 60;
> + endptr++;
> + break;
> + }
> + if (*endptr == '\n')

Nit: the per-case logic could be simplified to a single "shift_val = X"
if you initialise and handle !shift_val.

> + endptr++;
> + if (*endptr)
> + return -EINVAL;
> +
> + /* Overflow check. */
> + if (final_value < init_value)
> + return -EOVERFLOW;
> +done:
> + *res = final_value;
> + return 0;
> +}
> +EXPORT_SYMBOL(kstrtoull_suffix);
> +
> /**
> * kstrtoll - convert a string to a long long
> * @s: The start of the string. The string must be null-terminated, and may also
> @@ -159,7 +262,7 @@ int kstrtoll(const char *s, unsigned int base, long long *res)
> int rv;
>
> if (s[0] == '-') {
> - rv = _kstrtoull(s + 1, base, &tmp);
> + rv = _kstrtoull(s + 1, base, &tmp, NULL);
> if (rv < 0)
> return rv;
> if ((long long)-tmp > 0)