Re: [PATCH 1/2] lib/string_helpers: Introduce strsplit_u32()

From: Péter Ujfalusi
Date: Fri Jul 08 2022 - 08:34:59 EST




On 08/07/2022 15:30, Andy Shevchenko wrote:
> On Fri, Jul 08, 2022 at 02:13:14PM +0200, Cezary Rojewski wrote:
>> On 2022-07-08 1:46 PM, Andy Shevchenko wrote:
>>> On Fri, Jul 8, 2022 at 1:33 PM Cezary Rojewski
>>> <cezary.rojewski@xxxxxxxxx> wrote:
>
> ...
>
>>>> When I'd written the very first version of this function many months
>>>> ago, get_options() looked as it does not fulfill our needs. It seems to
>>>> be true even today: caller needs to know the number of elements in an
>>>> array upfront.
>>>
>>> Have you read a kernel doc for it? It does return the number of
>>> elements at the first pass.
>>
>> Yes, I've checked several parts of it. Perhaps I did miss something but
>> simple_strtoull() doc reads: use kstrtox() instead.
>
> Doc was fixed to make clearer that in some cases it's okay to use
> simple_strtox(). And this was exactly due to obfuscation code with this
> recommendation. Yes, in general one supposed to use kstrtox(), but it's
> not 100% obligatory.
>
>> Thus the strsplit_u32()
>> makes use of kstrtox().
>
> Yeah...
>
>>>> Also, kstrtox() takes into account '0x' and modifies the
>>>> base accordingly if that's the case. simple_strtoull() looks as not
>>>> capable of doing the same thing.
>>>
>>> How come?! It does parse all known prefixes: 0x, 0, +, -.
>>
>> Hmm.. doc says that it stops at the first non-digit character. Will
>> re-check.
>
> Yes, but under non-digit implies the standard prefixes of digits.
> simple_strtox() and kstrotox() use the very same function for prefixes.
>
>>>> The goal is to be able to parse input such as:
>>>>
>>>> 0x1000003,0,0,0x1000004,0,0
>>>>
>>>> into a sequence of 6 uints, filling the *tkns and *num_tkns for the caller.
>>>
>>> Yes. Have you checked the test cases for get_options()?
>
> (1)
>
> ...
>
>>>> avs-driver, which is also part of the ASoC framework has very similar
>>>> debug-interface. I believe there's no need to duplicate the functions -
>>>> move them to common code instead.
>>>
>>> Taking the above into account, please try to use get_options() and
>>> then tell me what's not working with it. If so, we will add test cases
>>> to get_options() and fix it.
>>
>> There is a difference:
>>
>> // get_options
>> int ints[5];
>>
>> s = get_options(str, ARRAY_SIZE(ints), ints);
>>
>> // strsplit_u32()
>> u32 *tkns, num_tkns;
>>
>> ret = strsplit_u32(str, delim, &tkns, &num_tkns);
>>
>> Nothing has been told upfront for in the second case.
>
> It seems you are missing the (1). The code has checks for the case where you
> can do get number upfront, it would just require two passes, but it's nothing
> in comparison of heave realloc().
>
> unsigned int *tokens;
> char *p;
> int num;
>
> p = get_options(str, 0, &num);
> if (num == 0)
> // No numbers in the string!
>
> tokens = kcalloc(num + 1, sizeof(*tokens), GFP_KERNEL);
> if (!tokens)
> return -ENOMEM;
>
> p = get_oprions(str, num, &tokens);
> if (*p)
> // String was parsed only partially!
> // assuming it's not a fatal error
>
> return tokens;
>

This diff is tested and works:
diff --git a/sound/soc/sof/sof-client-probes.c b/sound/soc/sof/sof-client-probes.c
index 60e4250fac87..48d405f78a83 100644
--- a/sound/soc/sof/sof-client-probes.c
+++ b/sound/soc/sof/sof-client-probes.c
@@ -410,61 +410,11 @@ static const struct snd_compress_ops sof_probes_compressed_ops = {
.copy = sof_probes_compr_copy,
};

-/**
- * strsplit_u32 - Split string into sequence of u32 tokens
- * @buf: String to split into tokens.
- * @delim: String containing delimiter characters.
- * @tkns: Returned u32 sequence pointer.
- * @num_tkns: Returned number of tokens obtained.
- */
-static int strsplit_u32(char *buf, const char *delim, u32 **tkns, size_t *num_tkns)
-{
- char *s;
- u32 *data, *tmp;
- size_t count = 0;
- size_t cap = 32;
- int ret = 0;
-
- *tkns = NULL;
- *num_tkns = 0;
- data = kcalloc(cap, sizeof(*data), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- while ((s = strsep(&buf, delim)) != NULL) {
- ret = kstrtouint(s, 0, data + count);
- if (ret)
- goto exit;
- if (++count >= cap) {
- cap *= 2;
- tmp = krealloc(data, cap * sizeof(*data), GFP_KERNEL);
- if (!tmp) {
- ret = -ENOMEM;
- goto exit;
- }
- data = tmp;
- }
- }
-
- if (!count)
- goto exit;
- *tkns = kmemdup(data, count * sizeof(*data), GFP_KERNEL);
- if (!(*tkns)) {
- ret = -ENOMEM;
- goto exit;
- }
- *num_tkns = count;
-
-exit:
- kfree(data);
- return ret;
-}
-
static int tokenize_input(const char __user *from, size_t count,
loff_t *ppos, u32 **tkns, size_t *num_tkns)
{
+ int ret, nints, i, *ints;
char *buf;
- int ret;

buf = kmalloc(count + 1, GFP_KERNEL);
if (!buf)
@@ -473,12 +423,36 @@ static int tokenize_input(const char __user *from, size_t count,
ret = simple_write_to_buffer(buf, count, ppos, from, count);
if (ret != count) {
ret = ret >= 0 ? -EIO : ret;
- goto exit;
+ goto free_buf;
}

buf[count] = '\0';
- ret = strsplit_u32(buf, ",", tkns, num_tkns);
-exit:
+ get_options(buf, 0, &nints);
+ if (!nints) {
+ ret = -ENOENT;
+ goto free_buf;
+ }
+
+ ints = kcalloc(nints + 1, sizeof(*ints), GFP_KERNEL);
+ if (!ints) {
+ ret = -ENOMEM;
+ goto free_buf;
+ }
+
+ *tkns = kcalloc(nints, sizeof(u32), GFP_KERNEL);
+ if (!(*tkns)) {
+ ret = -ENOMEM;
+ goto free_ints;
+ }
+
+ get_options(buf, nints + 1, ints);
+ *num_tkns = nints;
+ for (i = 0; i < nints; i++)
+ (*tkns)[i] = ints[i + 1];
+
+free_ints:
+ kfree(ints);
+free_buf:
kfree(buf);
return ret;
}

Could be made nicer with some brain work put to it, we need strict u32 within the IPC message for the array...

--
Péter