Re: [PATCH 3/6] bitmap_parselist: rework input string parser

From: Andy Shevchenko
Date: Tue Mar 26 2019 - 06:10:52 EST


On Tue, Mar 26, 2019 at 12:07:45AM +0300, Yury Norov wrote:
> The requirement for this rework is to keep the __bitmap_parselist()
> copy-less and single-pass but make it more readable and maintainable by
> splitting into logical parts and removing explicit nested cycles and
> opaque local variables.
>
> __bitmap_parselist() can parse userspace inputs and therefore we cannot
> use simple_strtoul() to parse numbers.

> +static long get_char(char *c, const char *str, int is_user)
> +{
> + if (unlikely(is_user))

Can is_user be boolean?

Can we name it from_user or is_from_user?

> + return __get_user(*c, (const char __force __user *)str);
> +
> + *c = *str;
> + return 0;
> +}
> +
> +static const char *bitmap_getnum(unsigned int *num, const char *str,
> + const char *const end, int is_user)
> +{
> + unsigned int n = 0;
> + const char *_str;
> + long ret;
> + char c;
> +

> + for (_str = str, *num = 0; _str <= end; _str++) {
> + ret = get_char(&c, _str, is_user);
> + if (ret)
> + return ERR_PTR(ret);
> +
> + if (!isdigit(c)) {
> + if (_str == str)
> + return ERR_PTR(-EINVAL);
> +
> + return _str;
> + }
> +
> + *num = *num * 10 + (c - '0');
> + if (*num < n)
> + return ERR_PTR(-EOVERFLOW);
> +
> + n = *num;
> + }

Can't we do other way around, i.e. move the loop body to a helper and do
something like this:

if (is_from_user) {
for (...) {
__get_user(...);
helper1();
helper2();
}
} else {
for (...) {
*c = *str;
helper1();
helper2()
}
}

Each branch can be optimized more I think.

> +
> + return _str;
> +}
> +
> +static const char *bitmap_find_region(const char *str,
> + const char *const end, int is_user)
> +{
> + long ret;
> + char c;
> +
> + for (; str <= end; str++) {
> + ret = get_char(&c, str, is_user);
> + if (ret)
> + return ERR_PTR(ret);
> +
> + /* Unexpected end of line. */
> + if (c == 0 || c == '\n')
> + return NULL;
> +
> + /*
> + * The format allows commas and whitespases
> + * at the beginning of the region, so skip it.
> + */
> + if (!isspace(c) && c != ',')
> + break;
> + }
> +
> + return str <= end ? str : NULL;
> +}
> +
> +static const char *bitmap_parse_region(struct region *r, const char *str,
> + const char *const end, int is_user)
> +{
> + long ret;
> + char c;
> +
> + str = bitmap_getnum(&r->start, str, end, is_user);
> + if (IS_ERR(str))
> + return str;
> +
> + ret = get_char(&c, str, is_user);
> + if (ret)
> + return (char *)ret;
> +
> + if (c == 0 || c == '\n') {
> + str = end + 1;
> + goto no_end;
> + }
> +
> + if (isspace(c) || c == ',')
> + goto no_end;
> +
> + if (c != '-')
> + return ERR_PTR(-EINVAL);
> +
> + str = bitmap_getnum(&r->end, str + 1, end, is_user);
> + if (IS_ERR(str))
> + return str;
> +
> + ret = get_char(&c, str, is_user);
> + if (ret)
> + return ERR_PTR(ret);
> +
> + if (c == 0 || c == '\n') {
> + str = end + 1;
> + goto no_pattern;
> + }
> +
> + if (isspace(c) || c == ',')
> + goto no_pattern;
> +
> + if (c != ':')
> + return ERR_PTR(-EINVAL);
> +
> + str = bitmap_getnum(&r->off, str + 1, end, is_user);
> + if (IS_ERR(str))
> + return str;
> +
> + ret = get_char(&c, str, is_user);
> + if (ret)
> + return (char *)ret;
> +
> + if (c != '/')
> + return ERR_PTR(-EINVAL);
> +

> + str = bitmap_getnum(&r->grlen, str + 1, end, is_user);
> +
> + return str;

return bitmap_getnum(...);

> +
> +no_end:
> + r->end = r->start;
> +no_pattern:
> + r->off = r->end + 1;
> + r->grlen = r->end + 1;
> +
> + return str;
> +}
> +

So, all above depends to what memory we access kernel / user space.
Perhaps we can get copy of memory of a given size and then parse it in kernel space always?

--
With Best Regards,
Andy Shevchenko