Re: [PATCH 3/4] bitmap_parselist: rework input string parser

From: Yuri Norov
Date: Wed Jan 09 2019 - 12:16:42 EST


On Wed, Jan 09, 2019 at 06:01:46PM +0200, Andy Shevchenko wrote:
> On Sun, Dec 23, 2018 at 09:44:55AM +0000, Yuri Norov wrote:
> > The requirement for this rework is to keep the __bitmap_parselist()
> > copy-less and single-pass but make it more readable and maintainable by
> > splitting into logical parts and removing explicit nested cycles and
> > opaque local variables.
> >
> > __bitmap_parselist() can parse userspace inputs and therefore we cannot
> > use simple_strtoul() to parse numbers.
>
> I see two issues with this patch:
> - you are using IS_ERR() but ain't utilizing PTR_ERR(), ERR_PTR() ones

OK. Will use them in v2.

> - you remove lines here which you added in the same series.
>
> Second one shows ordering issue of logical changes.

Do you mean this chunk?

> > - r.start = a;
> > - r.off = used_size;
> > - r.grlen = group_size;
> > - r.end = b;

It's because I split refactoring into 2 parts for the sake of
readability. Patch #2 may be applied independently if #3 will
be considered inappropriate, that's why I ordered it prior to
#3, and it caused the need for removing that lines. I don't
think it's too ugly though, and I'd prefer to keep 2 and 3
separated with the cost of this little mess.

Reversing the order looks tricky at the first glance, but I'm
OK to join #2 and #3 if it will be considered worthy. Would
work both ways.

> > Signed-off-by: Yury Norov <ynorov@xxxxxxxxxxx>
> > ---
> > lib/bitmap.c | 247 ++++++++++++++++++++++++++++++---------------------
> > 1 file changed, 148 insertions(+), 99 deletions(-)
> >
> > diff --git a/lib/bitmap.c b/lib/bitmap.c
> > index a60fd9723677..edc7068c28a6 100644
> > --- a/lib/bitmap.c
> > +++ b/lib/bitmap.c
> > @@ -513,6 +513,140 @@ static int bitmap_check_region(const struct region *r)
> > return 0;
> > }
> >
> > +static long get_char(char *c, const char *str, int is_user)
> > +{
> > + if (unlikely(is_user))
> > + return __get_user(*c, str);
> > +
> > + *c = *str;
> > + return 0;
> > +}
> > +
> > +static const char *bitmap_getnum(unsigned int *num, const char *str,
> > + const char *const end, int is_user)
> > +{
> > + unsigned int n = 0;
> > + const char *_str;
> > + long ret;
> > + char c;
> > +
> > + for (_str = str, *num = 0; _str <= end; _str++) {
> > + ret = get_char(&c, _str, is_user);
> > + if (ret)
> > + return (char *)ret;
> > +
> > + if (!isdigit(c)) {
> > + if (_str == str)
> > + return (char *)-EINVAL;
> > +
> > + goto out;
> > + }
> > +
> > + *num = *num * 10 + (c - '0');
> > + if (*num < n)
> > + return (char *)-EOVERFLOW;
> > +
> > + n = *num;
> > + }
> > +
> > +out:
> > + return _str;
> > +}
> > +
> > +static const char *bitmap_find_region(const char *str,
> > + const char *const end, int is_user)
> > +{
> > + long ret;
> > + char c;
> > +
> > + for (; str < end; str++) {
> > + ret = get_char(&c, str, is_user);
> > + if (ret)
> > + return (char *)ret;
> > +
> > + /* Unexpected end of line. */
> > + if (c == 0 || c == '\n')
> > + return NULL;
> > +
> > + /*
> > + * The format allows commas and whitespases
> > + * at the beginning of the region, so skip it.
> > + */
> > + if (!isspace(c) && c != ',')
> > + break;
> > + }
> > +
> > + return str;
> > +}
> > +
> > +static const char *bitmap_parse_region(struct region *r, const char *str,
> > + const char *const end, int is_user)
> > +{
> > + long ret;
> > + char c;
> > +
> > + str = bitmap_getnum(&r->start, str, end, is_user);
> > + if (IS_ERR(str))
> > + return str;
> > +
> > + ret = get_char(&c, str, is_user);
> > + if (ret)
> > + return (char *)ret;
> > +
> > + if (c == 0 || c == '\n') {
> > + str = end + 1;
> > + goto no_end;
> > + }
> > +
> > + if (isspace(c) || c == ',')
> > + goto no_end;
> > +
> > + if (c != '-')
> > + return (char *)-EINVAL;
> > +
> > + str = bitmap_getnum(&r->end, str + 1, end, is_user);
> > + if (IS_ERR(str))
> > + return str;
> > +
> > + ret = get_char(&c, str, is_user);
> > + if (ret)
> > + return (char *)ret;
> > +
> > + if (c == 0 || c == '\n') {
> > + str = end + 1;
> > + goto no_pattern;
> > + }
> > +
> > + if (isspace(c) || c == ',')
> > + goto no_pattern;
> > +
> > + if (c != ':')
> > + return (char *)-EINVAL;
> > +
> > + str = bitmap_getnum(&r->off, str + 1, end, is_user);
> > + if (IS_ERR(str))
> > + return str;
> > +
> > + ret = get_char(&c, str, is_user);
> > + if (ret)
> > + return (char *)ret;
> > +
> > + if (c != '/')
> > + return (char *)-EINVAL;
> > +
> > + str = bitmap_getnum(&r->grlen, str + 1, end, is_user);
> > +
> > + return str;
> > +
> > +no_end:
> > + r->end = r->start;
> > +no_pattern:
> > + r->off = r->end + 1;
> > + r->grlen = r->end + 1;
> > +
> > + return (char *)str;

This cast looks unneeded.

> > +}
> > +
> > /**
> > * __bitmap_parselist - convert list format ASCII string to bitmap
> > * @buf: read nul-terminated user string from this buffer
> > @@ -534,113 +668,28 @@ static int bitmap_check_region(const struct region *r)
> > *
> > * Returns: 0 on success, -errno on invalid input strings. Error values:
> > *
> > - * - ``-EINVAL``: second number in range smaller than first
> > + * - ``-EINVAL``: wrong region format
> > * - ``-EINVAL``: invalid character in string
> > * - ``-ERANGE``: bit number specified too large for mask
> > + * - ``-EOVERFLOW``: integer overflow in the input parameters
> > */
> > -static int __bitmap_parselist(const char *buf, unsigned int buflen,
> > +static int __bitmap_parselist(const char *buf, const char *const end,
> > int is_user, unsigned long *maskp,
> > int nmaskbits)
> > {
> > - unsigned int a, b, old_a, old_b;
> > - unsigned int group_size, used_size;
> > - int c, old_c, totaldigits, ndigits;
> > - const char __user __force *ubuf = (const char __user __force *)buf;
> > - int at_start, in_range, in_partial_range, ret;
> > struct region r;
> > + long ret;
> >
> > - totaldigits = c = 0;
> > - old_a = old_b = 0;
> > - group_size = used_size = 0;
> > bitmap_zero(maskp, nmaskbits);
> > - do {
> > - at_start = 1;
> > - in_range = 0;
> > - in_partial_range = 0;
> > - a = b = 0;
> > - ndigits = totaldigits;
> > -
> > - /* Get the next cpu# or a range of cpu#'s */
> > - while (buflen) {
> > - old_c = c;
> > - if (is_user) {
> > - if (__get_user(c, ubuf++))
> > - return -EFAULT;
> > - } else
> > - c = *buf++;
> > - buflen--;
> > - if (isspace(c))
> > - continue;
> > -
> > - /* A '\0' or a ',' signal the end of a cpu# or range */
> > - if (c == '\0' || c == ',')
> > - break;
> > - /*
> > - * whitespaces between digits are not allowed,
> > - * but it's ok if whitespaces are on head or tail.
> > - * when old_c is whilespace,
> > - * if totaldigits == ndigits, whitespace is on head.
> > - * if whitespace is on tail, it should not run here.
> > - * as c was ',' or '\0',
> > - * the last code line has broken the current loop.
> > - */
> > - if ((totaldigits != ndigits) && isspace(old_c))
> > - return -EINVAL;
> > -
> > - if (c == '/') {
> > - used_size = a;
> > - at_start = 1;
> > - in_range = 0;
> > - a = b = 0;
> > - continue;
> > - }
> > -
> > - if (c == ':') {
> > - old_a = a;
> > - old_b = b;
> > - at_start = 1;
> > - in_range = 0;
> > - in_partial_range = 1;
> > - a = b = 0;
> > - continue;
> > - }
> > -
> > - if (c == '-') {
> > - if (at_start || in_range)
> > - return -EINVAL;
> > - b = 0;
> > - in_range = 1;
> > - at_start = 1;
> > - continue;
> > - }
> >
> > - if (!isdigit(c))
> > - return -EINVAL;
> > -
> > - b = b * 10 + (c - '0');
> > - if (!in_range)
> > - a = b;
> > - at_start = 0;
> > - totaldigits++;
> > - }
> > - if (ndigits == totaldigits)
> > - continue;
> > - if (in_partial_range) {
> > - group_size = a;
> > - a = old_a;
> > - b = old_b;
> > - old_a = old_b = 0;
> > - } else {
> > - used_size = group_size = b - a + 1;
> > - }
> > - /* if no digit is after '-', it's wrong*/
> > - if (at_start && in_range)
> > - return -EINVAL;
> > + while (buf && buf < end) {
> > + buf = bitmap_find_region(buf, end, is_user);
> > + if (buf == NULL)
> > + return 0;
> >
> > - r.start = a;
> > - r.off = used_size;
> > - r.grlen = group_size;
> > - r.end = b;
> > + buf = bitmap_parse_region(&r, buf, end, is_user);
> > + if (IS_ERR(buf))
> > + return (long)buf;
> >
> > ret = bitmap_check_region(&r);
> > if (ret)
> > @@ -649,14 +698,14 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
> > ret = bitmap_set_region(&r, maskp, nmaskbits);
> > if (ret)
> > return ret;
> > + }
> >
> > - } while (buflen && c == ',');
> > return 0;
> > }
> >
> > int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
> > {
> > - return __bitmap_parselist(bp, UINT_MAX, 0, maskp, nmaskbits);
> > + return __bitmap_parselist(bp, (char *)SIZE_MAX, 0, maskp, nmaskbits);
> > }
> > EXPORT_SYMBOL(bitmap_parselist);
> >
> > @@ -683,7 +732,7 @@ int bitmap_parselist_user(const char __user *ubuf,
> > if (!access_ok(VERIFY_READ, ubuf, ulen))
> > return -EFAULT;
> > return __bitmap_parselist((const char __force *)ubuf,
> > - ulen, 1, maskp, nmaskbits);
> > + ubuf + ulen - 1, 1, maskp, nmaskbits);
> > }
> > EXPORT_SYMBOL(bitmap_parselist_user);
> >
> > --
> > 2.17.1
> >
>

Thanks,
Yury